Skip to content

[mlir][LLVMIR] Translation to LLVM fails in LLVM. #70160

Closed
@MaheshRavishankar

Description

@MaheshRavishankar

#69292 seems to hit a bug in LLVM (the PR itself might not be the root cause but is hitting a failure there).

After that PR, the following example fails in LLVM translation

module {
  llvm.mlir.global external @__dynamic_shared_memory__() {addr_space = 3 : i32, alignment = 16 : i64} : !llvm.array<0 x i8>
  llvm.mlir.global private @__shared_memory___0() {addr_space = 3 : i32, alignment = 64 : i64} : !llvm.array<1 x array<1 x f32>>
  llvm.mlir.global private @__shared_memory__() {addr_space = 3 : i32, alignment = 4 : i64} : !llvm.array<8 x f32>
  llvm.func @__nv_floorf(f32) -> f32
  llvm.func @_softmax_dynamic_dispatch_0_generic_12x128xD_f32(%arg0: !llvm.ptr<1> {llvm.align = 16 : i32, llvm.noalias, llvm.readonly}, %arg1: !llvm.ptr<1> {llvm.align = 16 : i32, llvm.noalias}, %arg2: i32, %arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32, %arg7: i32) {
    %0 = llvm.mlir.constant(-1 : i32) : i32
    %1 = llvm.mlir.constant(0 : i64) : i64
    %2 = llvm.mlir.constant(1 : i64) : i64
    %3 = llvm.mlir.constant(63 : index) : i64
    %4 = llvm.mlir.constant(16384 : index) : i64
    %5 = llvm.mlir.constant(32 : i64) : i64
    %6 = llvm.mlir.constant(256 : index) : i64
    %7 = llvm.mlir.constant(-3.40282347E+38 : f32) : f32
    %8 = llvm.mlir.constant(0 : i32) : i32
    %9 = llvm.mlir.constant(7 : index) : i64
    %10 = llvm.mlir.constant(32 : index) : i64
    %11 = llvm.mlir.constant(16 : i32) : i32
    %12 = llvm.mlir.constant(8 : i32) : i32
    %13 = llvm.mlir.constant(4 : i32) : i32
    %14 = llvm.mlir.constant(2 : i32) : i32
    %15 = llvm.mlir.constant(32 : i32) : i32
    %16 = llvm.mlir.constant(1 : i32) : i32
    %17 = llvm.mlir.constant(dense<0xFF800000> : vector<1x1xf32>) : !llvm.array<1 x vector<1xf32>>
    %18 = llvm.mlir.constant(1 : index) : i64
    %19 = llvm.mlir.constant(128 : index) : i64
    %20 = llvm.mlir.constant(0 : index) : i64
    %21 = llvm.mlir.constant(127 : i32) : i32
    %22 = llvm.mlir.constant(23 : i32) : i32
    %23 = llvm.mlir.constant(1.270000e+02 : f32) : f32
    %24 = llvm.mlir.constant(-1.270000e+02 : f32) : f32
    %25 = llvm.mlir.constant(8.880000e+01 : f32) : f32
    %26 = llvm.mlir.constant(-8.780000e+01 : f32) : f32
    %27 = llvm.mlir.constant(5.000000e-01 : f32) : f32
    %28 = llvm.mlir.constant(0.166666657 : f32) : f32
    %29 = llvm.mlir.constant(0.0416657962 : f32) : f32
    %30 = llvm.mlir.constant(0.00833345205 : f32) : f32
    %31 = llvm.mlir.constant(0.00139819994 : f32) : f32
    %32 = llvm.mlir.constant(1.98756912E-4 : f32) : f32
    %33 = llvm.mlir.constant(2.12194442E-4 : f32) : f32
    %34 = llvm.mlir.constant(-0.693359375 : f32) : f32
    %35 = llvm.mlir.constant(1.44269502 : f32) : f32
    %36 = llvm.mlir.constant(1.000000e+00 : f32) : f32
    %37 = llvm.mlir.constant(dense<true> : vector<1xi1>) : vector<1xi1>
    %38 = llvm.mlir.constant(dense<0.000000e+00> : vector<1x1xf32>) : !llvm.array<1 x vector<1xf32>>
    %39 = llvm.mlir.constant(true) : i1
    %40 = llvm.mlir.addressof @__dynamic_shared_memory__ : !llvm.ptr<3>
    %41 = llvm.getelementptr %40[0, 0] : (!llvm.ptr<3>) -> !llvm.ptr<3>, !llvm.ptr
    %42 = llvm.getelementptr %41[0, 0, 0] : (!llvm.ptr<3>) -> !llvm.ptr<3>, !llvm.array<1 x array<1 x f32>>
    %43 = llvm.getelementptr %40[0, 4] : (!llvm.ptr<3>) -> !llvm.ptr<3>, !llvm.ptr
    %44 = llvm.getelementptr %43[0, 0] : (!llvm.ptr<3>) -> !llvm.ptr<3>, !llvm.array<8 x f32>
    %45 = llvm.zext %arg4 : i32 to i64
    %46 = llvm.zext %arg5 : i32 to i64
    %47 = llvm.shl %46, %5  : i64
    %48 = llvm.or %45, %47  : i64
    %49 = llvm.zext %arg6 : i32 to i64
    %50 = llvm.zext %arg7 : i32 to i64
    %51 = llvm.shl %50, %5  : i64
    %52 = llvm.or %49, %51  : i64
    %53 = llvm.getelementptr %arg1[196608] : (!llvm.ptr<1>) -> !llvm.ptr<1>, f32
    %54 = llvm.ptrtoint %53 : !llvm.ptr<1> to i64
    %55 = llvm.and %54, %3  : i64
    %56 = llvm.icmp "eq" %55, %20 : i64
    "llvm.intr.assume"(%56) : (i1) -> ()
    %57 = llvm.mul %52, %2  : i64
    %58 = llvm.mul %57, %48  : i64
    %59 = llvm.ptrtoint %arg0 : !llvm.ptr<1> to i64
    %60 = llvm.and %59, %3  : i64
    %61 = llvm.icmp "eq" %60, %20 : i64
    "llvm.intr.assume"(%61) : (i1) -> ()
    %62 = nvvm.read.ptx.sreg.ctaid.x : i32
    %63 = llvm.sext %62 : i32 to i64
    %64 = nvvm.read.ptx.sreg.ctaid.y : i32
    %65 = llvm.sext %64 : i32 to i64
    %66 = nvvm.read.ptx.sreg.tid.x : i32
    %67 = llvm.sext %66 : i32 to i64
    llvm.br ^bb1(%67, %17 : i64, !llvm.array<1 x vector<1xf32>>)
  ^bb1(%68: i64, %69: !llvm.array<1 x vector<1xf32>>):  // 2 preds: ^bb0, ^bb2
    %70 = llvm.icmp "slt" %68, %52 : i64
    llvm.cond_br %70, ^bb2, ^bb3
  ^bb2:  // pred: ^bb1
    %71 = llvm.mul %63, %58  : i64
    %72 = llvm.mul %65, %57  : i64
    %73 = llvm.add %71, %72  : i64
    %74 = llvm.add %73, %68  : i64
    %75 = llvm.getelementptr %arg0[%74] : (!llvm.ptr<1>, i64) -> !llvm.ptr<1>, f32
    %76 = llvm.load %75 : !llvm.ptr<1> -> f32
    %77 = llvm.mlir.undef : vector<1xf32>
    %78 = llvm.insertelement %76, %77[%8 : i32] : vector<1xf32>
    %79 = llvm.shufflevector %78, %77 [0] : vector<1xf32> 
    %80 = llvm.extractvalue %69[0] : !llvm.array<1 x vector<1xf32>> 
    %81 = llvm.fcmp "ugt" %80, %79 : vector<1xf32>
    %82 = llvm.fcmp "uno" %79, %79 : vector<1xf32>
    %83 = llvm.xor %81, %37  : vector<1xi1>
    %84 = llvm.or %82, %83  : vector<1xi1>
    %85 = llvm.select %84, %79, %80 : vector<1xi1>, vector<1xf32>
    %86 = llvm.insertvalue %85, %38[0] : !llvm.array<1 x vector<1xf32>> 
    %87 = llvm.add %68, %6  : i64
    llvm.br ^bb1(%87, %86 : i64, !llvm.array<1 x vector<1xf32>>)
  ^bb3:  // pred: ^bb1
    nvvm.barrier0
    %88 = llvm.extractvalue %69[0] : !llvm.array<1 x vector<1xf32>> 
    %89 = llvm.extractelement %88[%1 : i64] : vector<1xf32>
    %90 = llvm.sub %15, %15  : i32
    %91 = llvm.lshr %0, %90  : i32
    %92 = llvm.sub %15, %16  : i32
    %93 = nvvm.shfl.sync  bfly %91, %89, %16, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
    %94 = llvm.extractvalue %93[0] : !llvm.struct<(f32, i1)> 
    %95 = llvm.fcmp "ugt" %89, %94 : f32
    %96 = llvm.fcmp "uno" %94, %94 : f32
    %97 = llvm.xor %95, %39  : i1
    %98 = llvm.or %96, %97  : i1
    %99 = llvm.select %98, %94, %89 : i1, f32
    %100 = nvvm.shfl.sync  bfly %91, %99, %14, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
    %101 = llvm.extractvalue %100[0] : !llvm.struct<(f32, i1)> 
    %102 = llvm.fcmp "ugt" %99, %101 : f32
    %103 = llvm.fcmp "uno" %101, %101 : f32
    %104 = llvm.xor %102, %39  : i1
    %105 = llvm.or %103, %104  : i1
    %106 = llvm.select %105, %101, %99 : i1, f32
    %107 = nvvm.shfl.sync  bfly %91, %106, %13, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
    %108 = llvm.extractvalue %107[0] : !llvm.struct<(f32, i1)> 
    %109 = llvm.fcmp "ugt" %106, %108 : f32
    %110 = llvm.fcmp "uno" %108, %108 : f32
    %111 = llvm.xor %109, %39  : i1
    %112 = llvm.or %110, %111  : i1
    %113 = llvm.select %112, %108, %106 : i1, f32
    %114 = nvvm.shfl.sync  bfly %91, %113, %12, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
    %115 = llvm.extractvalue %114[0] : !llvm.struct<(f32, i1)> 
    %116 = llvm.fcmp "ugt" %113, %115 : f32
    %117 = llvm.fcmp "uno" %115, %115 : f32
    %118 = llvm.xor %116, %39  : i1
    %119 = llvm.or %117, %118  : i1
    %120 = llvm.select %119, %115, %113 : i1, f32
    %121 = nvvm.shfl.sync  bfly %91, %120, %11, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
    %122 = llvm.extractvalue %121[0] : !llvm.struct<(f32, i1)> 
    %123 = llvm.fcmp "ugt" %120, %122 : f32
    %124 = llvm.fcmp "uno" %122, %122 : f32
    %125 = llvm.xor %123, %39  : i1
    %126 = llvm.or %124, %125  : i1
    %127 = llvm.select %126, %122, %120 : i1, f32
    %128 = llvm.udiv %67, %10  : i64
    %129 = llvm.urem %67, %10  : i64
    %130 = llvm.icmp "eq" %129, %20 : i64
    llvm.cond_br %130, ^bb4, ^bb5
  ^bb4:  // pred: ^bb3
    %131 = llvm.getelementptr %44[%128] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, f32
    llvm.store %127, %131 : f32, !llvm.ptr<3>
    llvm.br ^bb5
  ^bb5:  // 2 preds: ^bb3, ^bb4
    nvvm.barrier0
    %132 = llvm.intr.umin(%129, %9)  : (i64, i64) -> i64
    %133 = llvm.getelementptr %44[%132] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, f32
    %134 = llvm.load %133 : !llvm.ptr<3> -> f32
    %135 = nvvm.shfl.sync  bfly %91, %134, %16, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
    %136 = llvm.extractvalue %135[0] : !llvm.struct<(f32, i1)> 
    %137 = llvm.fcmp "ugt" %134, %136 : f32
    %138 = llvm.fcmp "uno" %136, %136 : f32
    %139 = llvm.xor %137, %39  : i1
    %140 = llvm.or %138, %139  : i1
    %141 = llvm.select %140, %136, %134 : i1, f32
    %142 = nvvm.shfl.sync  bfly %91, %141, %14, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
    %143 = llvm.extractvalue %142[0] : !llvm.struct<(f32, i1)> 
    %144 = llvm.fcmp "ugt" %141, %143 : f32
    %145 = llvm.fcmp "uno" %143, %143 : f32
    %146 = llvm.xor %144, %39  : i1
    %147 = llvm.or %145, %146  : i1
    %148 = llvm.select %147, %143, %141 : i1, f32
    %149 = nvvm.shfl.sync  bfly %91, %148, %13, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
    %150 = llvm.extractvalue %149[0] : !llvm.struct<(f32, i1)> 
    %151 = llvm.fcmp "ugt" %148, %150 : f32
    %152 = llvm.fcmp "uno" %150, %150 : f32
    %153 = llvm.xor %151, %39  : i1
    %154 = llvm.or %152, %153  : i1
    %155 = llvm.select %154, %150, %148 : i1, f32
    %156 = nvvm.shfl.sync  idx %91, %155, %8, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
    %157 = llvm.extractvalue %156[0] : !llvm.struct<(f32, i1)> 
    %158 = llvm.fcmp "ugt" %157, %7 : f32
    %159 = llvm.select %158, %157, %7 : i1, f32
    %160 = llvm.icmp "eq" %67, %20 : i64
    llvm.cond_br %160, ^bb6, ^bb7
  ^bb6:  // pred: ^bb5
    %161 = llvm.add %20, %20  : i64
    %162 = llvm.getelementptr %42[%161] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, f32
    llvm.store %159, %162 : f32, !llvm.ptr<3>
    llvm.br ^bb7
  ^bb7:  // 2 preds: ^bb5, ^bb6
    nvvm.barrier0
    %163 = llvm.sub %19, %67  : i64
    %164 = llvm.icmp "slt" %163, %18 : i64
    %165 = llvm.select %164, %163, %18 : i1, i64
    %166 = llvm.icmp "slt" %165, %20 : i64
    %167 = llvm.select %166, %20, %165 : i1, i64
    llvm.br ^bb8(%20 : i64)
  ^bb8(%168: i64):  // 2 preds: ^bb7, ^bb9
    %169 = llvm.icmp "slt" %168, %167 : i64
    llvm.cond_br %169, ^bb9, ^bb10
  ^bb9:  // pred: ^bb8
    %170 = llvm.add %67, %168  : i64
    %171 = llvm.mul %63, %58  : i64
    %172 = llvm.mul %65, %57  : i64
    %173 = llvm.add %171, %172  : i64
    %174 = llvm.add %173, %170  : i64
    %175 = llvm.getelementptr %arg0[%174] : (!llvm.ptr<1>, i64) -> !llvm.ptr<1>, f32
    %176 = llvm.load %175 : !llvm.ptr<1> -> f32
    %177 = llvm.add %20, %20  : i64
    %178 = llvm.getelementptr %42[%177] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, f32
    %179 = llvm.load %178 : !llvm.ptr<3> -> f32
    %180 = llvm.fsub %176, %179  : f32
    %181 = llvm.fcmp "uge" %180, %26 : f32
    %182 = llvm.select %181, %180, %26 : i1, f32
    %183 = llvm.fcmp "ule" %182, %25 : f32
    %184 = llvm.select %183, %182, %25 : i1, f32
    %185 = llvm.intr.fma(%184, %35, %27)  : (f32, f32, f32) -> f32
    %186 = llvm.call @__nv_floorf(%185) : (f32) -> f32
    %187 = llvm.fcmp "uge" %186, %24 : f32
    %188 = llvm.select %187, %186, %24 : i1, f32
    %189 = llvm.fcmp "ule" %188, %23 : f32
    %190 = llvm.select %189, %188, %23 : i1, f32
    %191 = llvm.intr.fma(%34, %190, %184)  : (f32, f32, f32) -> f32
    %192 = llvm.intr.fma(%33, %190, %191)  : (f32, f32, f32) -> f32
    %193 = llvm.intr.fma(%192, %32, %31)  : (f32, f32, f32) -> f32
    %194 = llvm.intr.fma(%193, %192, %30)  : (f32, f32, f32) -> f32
    %195 = llvm.intr.fma(%194, %192, %29)  : (f32, f32, f32) -> f32
    %196 = llvm.intr.fma(%195, %192, %28)  : (f32, f32, f32) -> f32
    %197 = llvm.intr.fma(%196, %192, %27)  : (f32, f32, f32) -> f32
    %198 = llvm.fmul %192, %192  : f32
    %199 = llvm.intr.fma(%197, %198, %192)  : (f32, f32, f32) -> f32
    %200 = llvm.fadd %199, %36  : f32
    %201 = llvm.fptosi %190 : f32 to i32
    %202 = llvm.add %201, %21  : i32
    %203 = llvm.shl %202, %22  : i32
    %204 = llvm.bitcast %203 : i32 to f32
    %205 = llvm.fmul %200, %204  : f32
    %206 = llvm.mul %63, %4  : i64
    %207 = llvm.mul %65, %19  : i64
    %208 = llvm.add %206, %207  : i64
    %209 = llvm.add %208, %170  : i64
    %210 = llvm.getelementptr %53[%209] : (!llvm.ptr<1>, i64) -> !llvm.ptr<1>, f32
    llvm.store %205, %210 : f32, !llvm.ptr<1>
    %211 = llvm.add %168, %18  : i64
    llvm.br ^bb8(%211 : i64)
  ^bb10:  // pred: ^bb8
    nvvm.barrier0
    llvm.return
  }
}

Repro command

mlir-translate -mlir-to-llvmir repro.mlir

Stack trace

0.	Program arguments: /home/mahesh/iree/build/Debug/llvm-project/bin/mlir-translate --mlir-to-llvmir repro.mlir
 #0 0x0000557a88e47f4d llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /home/mahesh/iree/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:723:11
 #1 0x0000557a88e4843b PrintStackTraceSignalHandler(void*) /home/mahesh/iree/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:798:1
 #2 0x0000557a88e46466 llvm::sys::RunSignalHandlers() /home/mahesh/iree/iree/third_party/llvm-project/llvm/lib/Support/Signals.cpp:105:5
 #3 0x0000557a88e48c55 SignalHandler(int) /home/mahesh/iree/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1
 #4 0x00007fb5d3a42520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #5 0x00007fb5d3a969fc __pthread_kill_implementation ./nptl/./nptl/pthread_kill.c:44:76
 #6 0x00007fb5d3a969fc __pthread_kill_internal ./nptl/./nptl/pthread_kill.c:78:10
 #7 0x00007fb5d3a969fc pthread_kill ./nptl/./nptl/pthread_kill.c:89:10
 #8 0x00007fb5d3a42476 gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #9 0x00007fb5d3a287f3 abort ./stdlib/./stdlib/abort.c:81:7
#10 0x00007fb5d3a2871b _nl_load_domain ./intl/./intl/loadmsgcat.c:1177:9
#11 0x00007fb5d3a39e96 (/lib/x86_64-linux-gnu/libc.so.6+0x39e96)
#12 0x0000557a8aae0828 decltype(auto) llvm::cast<llvm::ArrayType, llvm::Type>(llvm::Type*) /home/mahesh/iree/iree/third_party/llvm-project/llvm/include/llvm/Support/Casting.h:579:10
#13 0x0000557a8cad1865 llvm::ConstantFoldGetElementPtr(llvm::Type*, llvm::Constant*, bool, std::optional<unsigned int>, llvm::ArrayRef<llvm::Value*>) /home/mahesh/iree/iree/third_party/llvm-project/llvm/lib/IR/ConstantFold.cpp:2076:11
#14 0x0000557a8caef100 llvm::ConstantExpr::getGetElementPtr(llvm::Type*, llvm::Constant*, llvm::ArrayRef<llvm::Value*>, bool, std::optional<unsigned int>, llvm::Type*) /home/mahesh/iree/iree/third_party/llvm-project/llvm/lib/IR/Constants.cpp:2408:17
#15 0x0000557a8cc2a4cd llvm::ConstantFolder::FoldGEP(llvm::Type*, llvm::Value*, llvm::ArrayRef<llvm::Value*>, bool) const /home/mahesh/iree/iree/third_party/llvm-project/llvm/include/llvm/IR/ConstantFolder.h:119:9
#16 0x0000557a8aad535c llvm::IRBuilderBase::CreateGEP(llvm::Type*, llvm::Value*, llvm::ArrayRef<llvm::Value*>, llvm::Twine const&, bool) /home/mahesh/iree/iree/third_party/llvm-project/llvm/include/llvm/IR/IRBuilder.h:1864:15
#17 0x0000557a8aab4499 convertOperationImpl(mlir::Operation&, llvm::IRBuilderBase&, mlir::LLVM::ModuleTranslation&) /home/mahesh/iree/build/Debug/llvm-project/tools/mlir/include/mlir/Dialect/LLVMIR/LLVMConversions.inc:194:55
#18 0x0000557a8aab1b19 (anonymous namespace)::LLVMDialectLLVMIRTranslationInterface::convertOperation(mlir::Operation*, llvm::IRBuilderBase&, mlir::LLVM::ModuleTranslation&) const /home/mahesh/iree/iree/third_party/llvm-project/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp:396:12
#19 0x0000557a89fb507e mlir::LLVM::ModuleTranslation::convertOperation(mlir::Operation&, llvm::IRBuilderBase&) /home/mahesh/iree/iree/third_party/llvm-project/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp:595:23
#20 0x0000557a89fb55e8 mlir::LLVM::ModuleTranslation::convertBlock(mlir::Block&, bool, llvm::IRBuilderBase&) /home/mahesh/iree/iree/third_party/llvm-project/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp:641:16
#21 0x0000557a89fb703e mlir::LLVM::ModuleTranslation::convertOneFunction(mlir::LLVM::LLVMFuncOp) /home/mahesh/iree/iree/third_party/llvm-project/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp:911:16
#22 0x0000557a89fb8bab mlir::LLVM::ModuleTranslation::convertFunctions() /home/mahesh/iree/iree/third_party/llvm-project/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp:1052:16
#23 0x0000557a89fba286 mlir::translateModuleToLLVMIR(mlir::Operation*, llvm::LLVMContext&, llvm::StringRef) /home/mahesh/iree/iree/third_party/llvm-project/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp:1378:25

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions