Closed
Description
#69292 seems to hit a bug in LLVM (the PR itself might not be the root cause but is hitting a failure there).
After that PR, the following example fails in LLVM translation
module {
llvm.mlir.global external @__dynamic_shared_memory__() {addr_space = 3 : i32, alignment = 16 : i64} : !llvm.array<0 x i8>
llvm.mlir.global private @__shared_memory___0() {addr_space = 3 : i32, alignment = 64 : i64} : !llvm.array<1 x array<1 x f32>>
llvm.mlir.global private @__shared_memory__() {addr_space = 3 : i32, alignment = 4 : i64} : !llvm.array<8 x f32>
llvm.func @__nv_floorf(f32) -> f32
llvm.func @_softmax_dynamic_dispatch_0_generic_12x128xD_f32(%arg0: !llvm.ptr<1> {llvm.align = 16 : i32, llvm.noalias, llvm.readonly}, %arg1: !llvm.ptr<1> {llvm.align = 16 : i32, llvm.noalias}, %arg2: i32, %arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32, %arg7: i32) {
%0 = llvm.mlir.constant(-1 : i32) : i32
%1 = llvm.mlir.constant(0 : i64) : i64
%2 = llvm.mlir.constant(1 : i64) : i64
%3 = llvm.mlir.constant(63 : index) : i64
%4 = llvm.mlir.constant(16384 : index) : i64
%5 = llvm.mlir.constant(32 : i64) : i64
%6 = llvm.mlir.constant(256 : index) : i64
%7 = llvm.mlir.constant(-3.40282347E+38 : f32) : f32
%8 = llvm.mlir.constant(0 : i32) : i32
%9 = llvm.mlir.constant(7 : index) : i64
%10 = llvm.mlir.constant(32 : index) : i64
%11 = llvm.mlir.constant(16 : i32) : i32
%12 = llvm.mlir.constant(8 : i32) : i32
%13 = llvm.mlir.constant(4 : i32) : i32
%14 = llvm.mlir.constant(2 : i32) : i32
%15 = llvm.mlir.constant(32 : i32) : i32
%16 = llvm.mlir.constant(1 : i32) : i32
%17 = llvm.mlir.constant(dense<0xFF800000> : vector<1x1xf32>) : !llvm.array<1 x vector<1xf32>>
%18 = llvm.mlir.constant(1 : index) : i64
%19 = llvm.mlir.constant(128 : index) : i64
%20 = llvm.mlir.constant(0 : index) : i64
%21 = llvm.mlir.constant(127 : i32) : i32
%22 = llvm.mlir.constant(23 : i32) : i32
%23 = llvm.mlir.constant(1.270000e+02 : f32) : f32
%24 = llvm.mlir.constant(-1.270000e+02 : f32) : f32
%25 = llvm.mlir.constant(8.880000e+01 : f32) : f32
%26 = llvm.mlir.constant(-8.780000e+01 : f32) : f32
%27 = llvm.mlir.constant(5.000000e-01 : f32) : f32
%28 = llvm.mlir.constant(0.166666657 : f32) : f32
%29 = llvm.mlir.constant(0.0416657962 : f32) : f32
%30 = llvm.mlir.constant(0.00833345205 : f32) : f32
%31 = llvm.mlir.constant(0.00139819994 : f32) : f32
%32 = llvm.mlir.constant(1.98756912E-4 : f32) : f32
%33 = llvm.mlir.constant(2.12194442E-4 : f32) : f32
%34 = llvm.mlir.constant(-0.693359375 : f32) : f32
%35 = llvm.mlir.constant(1.44269502 : f32) : f32
%36 = llvm.mlir.constant(1.000000e+00 : f32) : f32
%37 = llvm.mlir.constant(dense<true> : vector<1xi1>) : vector<1xi1>
%38 = llvm.mlir.constant(dense<0.000000e+00> : vector<1x1xf32>) : !llvm.array<1 x vector<1xf32>>
%39 = llvm.mlir.constant(true) : i1
%40 = llvm.mlir.addressof @__dynamic_shared_memory__ : !llvm.ptr<3>
%41 = llvm.getelementptr %40[0, 0] : (!llvm.ptr<3>) -> !llvm.ptr<3>, !llvm.ptr
%42 = llvm.getelementptr %41[0, 0, 0] : (!llvm.ptr<3>) -> !llvm.ptr<3>, !llvm.array<1 x array<1 x f32>>
%43 = llvm.getelementptr %40[0, 4] : (!llvm.ptr<3>) -> !llvm.ptr<3>, !llvm.ptr
%44 = llvm.getelementptr %43[0, 0] : (!llvm.ptr<3>) -> !llvm.ptr<3>, !llvm.array<8 x f32>
%45 = llvm.zext %arg4 : i32 to i64
%46 = llvm.zext %arg5 : i32 to i64
%47 = llvm.shl %46, %5 : i64
%48 = llvm.or %45, %47 : i64
%49 = llvm.zext %arg6 : i32 to i64
%50 = llvm.zext %arg7 : i32 to i64
%51 = llvm.shl %50, %5 : i64
%52 = llvm.or %49, %51 : i64
%53 = llvm.getelementptr %arg1[196608] : (!llvm.ptr<1>) -> !llvm.ptr<1>, f32
%54 = llvm.ptrtoint %53 : !llvm.ptr<1> to i64
%55 = llvm.and %54, %3 : i64
%56 = llvm.icmp "eq" %55, %20 : i64
"llvm.intr.assume"(%56) : (i1) -> ()
%57 = llvm.mul %52, %2 : i64
%58 = llvm.mul %57, %48 : i64
%59 = llvm.ptrtoint %arg0 : !llvm.ptr<1> to i64
%60 = llvm.and %59, %3 : i64
%61 = llvm.icmp "eq" %60, %20 : i64
"llvm.intr.assume"(%61) : (i1) -> ()
%62 = nvvm.read.ptx.sreg.ctaid.x : i32
%63 = llvm.sext %62 : i32 to i64
%64 = nvvm.read.ptx.sreg.ctaid.y : i32
%65 = llvm.sext %64 : i32 to i64
%66 = nvvm.read.ptx.sreg.tid.x : i32
%67 = llvm.sext %66 : i32 to i64
llvm.br ^bb1(%67, %17 : i64, !llvm.array<1 x vector<1xf32>>)
^bb1(%68: i64, %69: !llvm.array<1 x vector<1xf32>>): // 2 preds: ^bb0, ^bb2
%70 = llvm.icmp "slt" %68, %52 : i64
llvm.cond_br %70, ^bb2, ^bb3
^bb2: // pred: ^bb1
%71 = llvm.mul %63, %58 : i64
%72 = llvm.mul %65, %57 : i64
%73 = llvm.add %71, %72 : i64
%74 = llvm.add %73, %68 : i64
%75 = llvm.getelementptr %arg0[%74] : (!llvm.ptr<1>, i64) -> !llvm.ptr<1>, f32
%76 = llvm.load %75 : !llvm.ptr<1> -> f32
%77 = llvm.mlir.undef : vector<1xf32>
%78 = llvm.insertelement %76, %77[%8 : i32] : vector<1xf32>
%79 = llvm.shufflevector %78, %77 [0] : vector<1xf32>
%80 = llvm.extractvalue %69[0] : !llvm.array<1 x vector<1xf32>>
%81 = llvm.fcmp "ugt" %80, %79 : vector<1xf32>
%82 = llvm.fcmp "uno" %79, %79 : vector<1xf32>
%83 = llvm.xor %81, %37 : vector<1xi1>
%84 = llvm.or %82, %83 : vector<1xi1>
%85 = llvm.select %84, %79, %80 : vector<1xi1>, vector<1xf32>
%86 = llvm.insertvalue %85, %38[0] : !llvm.array<1 x vector<1xf32>>
%87 = llvm.add %68, %6 : i64
llvm.br ^bb1(%87, %86 : i64, !llvm.array<1 x vector<1xf32>>)
^bb3: // pred: ^bb1
nvvm.barrier0
%88 = llvm.extractvalue %69[0] : !llvm.array<1 x vector<1xf32>>
%89 = llvm.extractelement %88[%1 : i64] : vector<1xf32>
%90 = llvm.sub %15, %15 : i32
%91 = llvm.lshr %0, %90 : i32
%92 = llvm.sub %15, %16 : i32
%93 = nvvm.shfl.sync bfly %91, %89, %16, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
%94 = llvm.extractvalue %93[0] : !llvm.struct<(f32, i1)>
%95 = llvm.fcmp "ugt" %89, %94 : f32
%96 = llvm.fcmp "uno" %94, %94 : f32
%97 = llvm.xor %95, %39 : i1
%98 = llvm.or %96, %97 : i1
%99 = llvm.select %98, %94, %89 : i1, f32
%100 = nvvm.shfl.sync bfly %91, %99, %14, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
%101 = llvm.extractvalue %100[0] : !llvm.struct<(f32, i1)>
%102 = llvm.fcmp "ugt" %99, %101 : f32
%103 = llvm.fcmp "uno" %101, %101 : f32
%104 = llvm.xor %102, %39 : i1
%105 = llvm.or %103, %104 : i1
%106 = llvm.select %105, %101, %99 : i1, f32
%107 = nvvm.shfl.sync bfly %91, %106, %13, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
%108 = llvm.extractvalue %107[0] : !llvm.struct<(f32, i1)>
%109 = llvm.fcmp "ugt" %106, %108 : f32
%110 = llvm.fcmp "uno" %108, %108 : f32
%111 = llvm.xor %109, %39 : i1
%112 = llvm.or %110, %111 : i1
%113 = llvm.select %112, %108, %106 : i1, f32
%114 = nvvm.shfl.sync bfly %91, %113, %12, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
%115 = llvm.extractvalue %114[0] : !llvm.struct<(f32, i1)>
%116 = llvm.fcmp "ugt" %113, %115 : f32
%117 = llvm.fcmp "uno" %115, %115 : f32
%118 = llvm.xor %116, %39 : i1
%119 = llvm.or %117, %118 : i1
%120 = llvm.select %119, %115, %113 : i1, f32
%121 = nvvm.shfl.sync bfly %91, %120, %11, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
%122 = llvm.extractvalue %121[0] : !llvm.struct<(f32, i1)>
%123 = llvm.fcmp "ugt" %120, %122 : f32
%124 = llvm.fcmp "uno" %122, %122 : f32
%125 = llvm.xor %123, %39 : i1
%126 = llvm.or %124, %125 : i1
%127 = llvm.select %126, %122, %120 : i1, f32
%128 = llvm.udiv %67, %10 : i64
%129 = llvm.urem %67, %10 : i64
%130 = llvm.icmp "eq" %129, %20 : i64
llvm.cond_br %130, ^bb4, ^bb5
^bb4: // pred: ^bb3
%131 = llvm.getelementptr %44[%128] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, f32
llvm.store %127, %131 : f32, !llvm.ptr<3>
llvm.br ^bb5
^bb5: // 2 preds: ^bb3, ^bb4
nvvm.barrier0
%132 = llvm.intr.umin(%129, %9) : (i64, i64) -> i64
%133 = llvm.getelementptr %44[%132] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, f32
%134 = llvm.load %133 : !llvm.ptr<3> -> f32
%135 = nvvm.shfl.sync bfly %91, %134, %16, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
%136 = llvm.extractvalue %135[0] : !llvm.struct<(f32, i1)>
%137 = llvm.fcmp "ugt" %134, %136 : f32
%138 = llvm.fcmp "uno" %136, %136 : f32
%139 = llvm.xor %137, %39 : i1
%140 = llvm.or %138, %139 : i1
%141 = llvm.select %140, %136, %134 : i1, f32
%142 = nvvm.shfl.sync bfly %91, %141, %14, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
%143 = llvm.extractvalue %142[0] : !llvm.struct<(f32, i1)>
%144 = llvm.fcmp "ugt" %141, %143 : f32
%145 = llvm.fcmp "uno" %143, %143 : f32
%146 = llvm.xor %144, %39 : i1
%147 = llvm.or %145, %146 : i1
%148 = llvm.select %147, %143, %141 : i1, f32
%149 = nvvm.shfl.sync bfly %91, %148, %13, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
%150 = llvm.extractvalue %149[0] : !llvm.struct<(f32, i1)>
%151 = llvm.fcmp "ugt" %148, %150 : f32
%152 = llvm.fcmp "uno" %150, %150 : f32
%153 = llvm.xor %151, %39 : i1
%154 = llvm.or %152, %153 : i1
%155 = llvm.select %154, %150, %148 : i1, f32
%156 = nvvm.shfl.sync idx %91, %155, %8, %92 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
%157 = llvm.extractvalue %156[0] : !llvm.struct<(f32, i1)>
%158 = llvm.fcmp "ugt" %157, %7 : f32
%159 = llvm.select %158, %157, %7 : i1, f32
%160 = llvm.icmp "eq" %67, %20 : i64
llvm.cond_br %160, ^bb6, ^bb7
^bb6: // pred: ^bb5
%161 = llvm.add %20, %20 : i64
%162 = llvm.getelementptr %42[%161] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, f32
llvm.store %159, %162 : f32, !llvm.ptr<3>
llvm.br ^bb7
^bb7: // 2 preds: ^bb5, ^bb6
nvvm.barrier0
%163 = llvm.sub %19, %67 : i64
%164 = llvm.icmp "slt" %163, %18 : i64
%165 = llvm.select %164, %163, %18 : i1, i64
%166 = llvm.icmp "slt" %165, %20 : i64
%167 = llvm.select %166, %20, %165 : i1, i64
llvm.br ^bb8(%20 : i64)
^bb8(%168: i64): // 2 preds: ^bb7, ^bb9
%169 = llvm.icmp "slt" %168, %167 : i64
llvm.cond_br %169, ^bb9, ^bb10
^bb9: // pred: ^bb8
%170 = llvm.add %67, %168 : i64
%171 = llvm.mul %63, %58 : i64
%172 = llvm.mul %65, %57 : i64
%173 = llvm.add %171, %172 : i64
%174 = llvm.add %173, %170 : i64
%175 = llvm.getelementptr %arg0[%174] : (!llvm.ptr<1>, i64) -> !llvm.ptr<1>, f32
%176 = llvm.load %175 : !llvm.ptr<1> -> f32
%177 = llvm.add %20, %20 : i64
%178 = llvm.getelementptr %42[%177] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, f32
%179 = llvm.load %178 : !llvm.ptr<3> -> f32
%180 = llvm.fsub %176, %179 : f32
%181 = llvm.fcmp "uge" %180, %26 : f32
%182 = llvm.select %181, %180, %26 : i1, f32
%183 = llvm.fcmp "ule" %182, %25 : f32
%184 = llvm.select %183, %182, %25 : i1, f32
%185 = llvm.intr.fma(%184, %35, %27) : (f32, f32, f32) -> f32
%186 = llvm.call @__nv_floorf(%185) : (f32) -> f32
%187 = llvm.fcmp "uge" %186, %24 : f32
%188 = llvm.select %187, %186, %24 : i1, f32
%189 = llvm.fcmp "ule" %188, %23 : f32
%190 = llvm.select %189, %188, %23 : i1, f32
%191 = llvm.intr.fma(%34, %190, %184) : (f32, f32, f32) -> f32
%192 = llvm.intr.fma(%33, %190, %191) : (f32, f32, f32) -> f32
%193 = llvm.intr.fma(%192, %32, %31) : (f32, f32, f32) -> f32
%194 = llvm.intr.fma(%193, %192, %30) : (f32, f32, f32) -> f32
%195 = llvm.intr.fma(%194, %192, %29) : (f32, f32, f32) -> f32
%196 = llvm.intr.fma(%195, %192, %28) : (f32, f32, f32) -> f32
%197 = llvm.intr.fma(%196, %192, %27) : (f32, f32, f32) -> f32
%198 = llvm.fmul %192, %192 : f32
%199 = llvm.intr.fma(%197, %198, %192) : (f32, f32, f32) -> f32
%200 = llvm.fadd %199, %36 : f32
%201 = llvm.fptosi %190 : f32 to i32
%202 = llvm.add %201, %21 : i32
%203 = llvm.shl %202, %22 : i32
%204 = llvm.bitcast %203 : i32 to f32
%205 = llvm.fmul %200, %204 : f32
%206 = llvm.mul %63, %4 : i64
%207 = llvm.mul %65, %19 : i64
%208 = llvm.add %206, %207 : i64
%209 = llvm.add %208, %170 : i64
%210 = llvm.getelementptr %53[%209] : (!llvm.ptr<1>, i64) -> !llvm.ptr<1>, f32
llvm.store %205, %210 : f32, !llvm.ptr<1>
%211 = llvm.add %168, %18 : i64
llvm.br ^bb8(%211 : i64)
^bb10: // pred: ^bb8
nvvm.barrier0
llvm.return
}
}
Repro command
mlir-translate -mlir-to-llvmir repro.mlir
Stack trace
0. Program arguments: /home/mahesh/iree/build/Debug/llvm-project/bin/mlir-translate --mlir-to-llvmir repro.mlir
#0 0x0000557a88e47f4d llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /home/mahesh/iree/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:723:11
#1 0x0000557a88e4843b PrintStackTraceSignalHandler(void*) /home/mahesh/iree/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:798:1
#2 0x0000557a88e46466 llvm::sys::RunSignalHandlers() /home/mahesh/iree/iree/third_party/llvm-project/llvm/lib/Support/Signals.cpp:105:5
#3 0x0000557a88e48c55 SignalHandler(int) /home/mahesh/iree/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1
#4 0x00007fb5d3a42520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
#5 0x00007fb5d3a969fc __pthread_kill_implementation ./nptl/./nptl/pthread_kill.c:44:76
#6 0x00007fb5d3a969fc __pthread_kill_internal ./nptl/./nptl/pthread_kill.c:78:10
#7 0x00007fb5d3a969fc pthread_kill ./nptl/./nptl/pthread_kill.c:89:10
#8 0x00007fb5d3a42476 gsignal ./signal/../sysdeps/posix/raise.c:27:6
#9 0x00007fb5d3a287f3 abort ./stdlib/./stdlib/abort.c:81:7
#10 0x00007fb5d3a2871b _nl_load_domain ./intl/./intl/loadmsgcat.c:1177:9
#11 0x00007fb5d3a39e96 (/lib/x86_64-linux-gnu/libc.so.6+0x39e96)
#12 0x0000557a8aae0828 decltype(auto) llvm::cast<llvm::ArrayType, llvm::Type>(llvm::Type*) /home/mahesh/iree/iree/third_party/llvm-project/llvm/include/llvm/Support/Casting.h:579:10
#13 0x0000557a8cad1865 llvm::ConstantFoldGetElementPtr(llvm::Type*, llvm::Constant*, bool, std::optional<unsigned int>, llvm::ArrayRef<llvm::Value*>) /home/mahesh/iree/iree/third_party/llvm-project/llvm/lib/IR/ConstantFold.cpp:2076:11
#14 0x0000557a8caef100 llvm::ConstantExpr::getGetElementPtr(llvm::Type*, llvm::Constant*, llvm::ArrayRef<llvm::Value*>, bool, std::optional<unsigned int>, llvm::Type*) /home/mahesh/iree/iree/third_party/llvm-project/llvm/lib/IR/Constants.cpp:2408:17
#15 0x0000557a8cc2a4cd llvm::ConstantFolder::FoldGEP(llvm::Type*, llvm::Value*, llvm::ArrayRef<llvm::Value*>, bool) const /home/mahesh/iree/iree/third_party/llvm-project/llvm/include/llvm/IR/ConstantFolder.h:119:9
#16 0x0000557a8aad535c llvm::IRBuilderBase::CreateGEP(llvm::Type*, llvm::Value*, llvm::ArrayRef<llvm::Value*>, llvm::Twine const&, bool) /home/mahesh/iree/iree/third_party/llvm-project/llvm/include/llvm/IR/IRBuilder.h:1864:15
#17 0x0000557a8aab4499 convertOperationImpl(mlir::Operation&, llvm::IRBuilderBase&, mlir::LLVM::ModuleTranslation&) /home/mahesh/iree/build/Debug/llvm-project/tools/mlir/include/mlir/Dialect/LLVMIR/LLVMConversions.inc:194:55
#18 0x0000557a8aab1b19 (anonymous namespace)::LLVMDialectLLVMIRTranslationInterface::convertOperation(mlir::Operation*, llvm::IRBuilderBase&, mlir::LLVM::ModuleTranslation&) const /home/mahesh/iree/iree/third_party/llvm-project/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp:396:12
#19 0x0000557a89fb507e mlir::LLVM::ModuleTranslation::convertOperation(mlir::Operation&, llvm::IRBuilderBase&) /home/mahesh/iree/iree/third_party/llvm-project/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp:595:23
#20 0x0000557a89fb55e8 mlir::LLVM::ModuleTranslation::convertBlock(mlir::Block&, bool, llvm::IRBuilderBase&) /home/mahesh/iree/iree/third_party/llvm-project/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp:641:16
#21 0x0000557a89fb703e mlir::LLVM::ModuleTranslation::convertOneFunction(mlir::LLVM::LLVMFuncOp) /home/mahesh/iree/iree/third_party/llvm-project/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp:911:16
#22 0x0000557a89fb8bab mlir::LLVM::ModuleTranslation::convertFunctions() /home/mahesh/iree/iree/third_party/llvm-project/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp:1052:16
#23 0x0000557a89fba286 mlir::translateModuleToLLVMIR(mlir::Operation*, llvm::LLVMContext&, llvm::StringRef) /home/mahesh/iree/iree/third_party/llvm-project/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp:1378:25