diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e0c3cc5eddb82..341cf51173ccc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22142,6 +22142,10 @@ static SDValue performVectorShiftCombine(SDNode *N, if (DCI.DAG.ComputeNumSignBits(Op.getOperand(0)) > ShiftImm) return Op.getOperand(0); + // If the shift is exact, the shifted out bits matter. + if (N->getFlags().hasExact()) + return SDValue(); + APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm); APInt DemandedMask = ~ShiftedOutBits; diff --git a/llvm/test/CodeGen/AArch64/shr-exact-demanded-bits.ll b/llvm/test/CodeGen/AArch64/shr-exact-demanded-bits.ll new file mode 100644 index 0000000000000..9698626aea655 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/shr-exact-demanded-bits.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s | FileCheck %s +target triple = "aarch64-linux" + +define <2 x i32> @f(i8 %0, i8 %1) { +; CHECK-LABEL: f: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.b[3], w0 +; CHECK-NEXT: mov v0.b[7], w1 +; CHECK-NEXT: sshr v0.2s, v0.2s, #24 +; CHECK-NEXT: ret + %3 = insertelement <2 x i8> poison, i8 %0, i64 0 + %4 = insertelement <2 x i8> %3, i8 %1, i64 1 + %5 = shufflevector <2 x i8> %4, <2 x i8> , <8 x i32> + %6 = bitcast <8 x i8> %5 to <2 x i32> + %7 = ashr exact <2 x i32> %6, + ret <2 x i32> %7 +} + +define <2 x i32> @g(i8 %0, i8 %1) { +; CHECK-LABEL: g: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.b[3], w0 +; CHECK-NEXT: mov v0.b[7], w1 +; CHECK-NEXT: ushr v0.2s, v0.2s, #24 +; CHECK-NEXT: ret + %3 = insertelement <2 x i8> poison, i8 %0, i64 0 + %4 = insertelement <2 x i8> %3, i8 %1, i64 1 + %5 = shufflevector <2 x i8> %4, <2 x i8> , <8 x i32> + %6 = bitcast <8 x i8> %5 to <2 x i32> + %7 = lshr exact <2 x i32> %6, + ret <2 x i32> %7 +}