diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index b00c8cc16fe0f..0397fb299d27b 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -2800,6 +2800,23 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { return SelectInst::Create(NewSelector, Op1, Op0, "blendv"); } + // Peek through a one-use shuffle - VectorCombine should have simplified + // this for cases where we're splitting wider vectors to use blendv + // intrinsics. + Value *MaskSrc = nullptr; + ArrayRef ShuffleMask; + if (match(Mask, PatternMatch::m_OneUse(PatternMatch::m_Shuffle( + PatternMatch::m_Value(MaskSrc), PatternMatch::m_Undef(), + PatternMatch::m_Mask(ShuffleMask))))) { + // Bail if the shuffle was irregular or contains undefs. + int NumElts = cast(MaskSrc->getType())->getNumElements(); + if (NumElts < ShuffleMask.size() || !isPowerOf2_32(NumElts) || + any_of(ShuffleMask, + [NumElts](int M) { return M < 0 || M >= NumElts; })) + break; + Mask = MaskSrc; + } + // Convert to a vector select if we can bypass casts and find a boolean // vector condition value. Value *BoolVec; @@ -2809,11 +2826,26 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { BoolVec->getType()->getScalarSizeInBits() == 1) { auto *MaskTy = cast(Mask->getType()); auto *OpTy = cast(II.getType()); + unsigned NumMaskElts = MaskTy->getNumElements(); + unsigned NumOperandElts = OpTy->getNumElements(); + + // If we peeked through a shuffle, reapply the shuffle to the bool vector. + if (MaskSrc) { + unsigned NumMaskSrcElts = + cast(MaskSrc->getType())->getNumElements(); + NumMaskElts = (ShuffleMask.size() * NumMaskElts) / NumMaskSrcElts; + // Multiple mask bits maps to the same operand element - bail out. + if (NumMaskElts > NumOperandElts) + break; + SmallVector ScaledMask; + if (!llvm::scaleShuffleMaskElts(NumMaskElts, ShuffleMask, ScaledMask)) + break; + BoolVec = IC.Builder.CreateShuffleVector(BoolVec, ScaledMask); + MaskTy = FixedVectorType::get(MaskTy->getElementType(), NumMaskElts); + } assert(MaskTy->getPrimitiveSizeInBits() == OpTy->getPrimitiveSizeInBits() && "Not expecting mask and operands with different sizes"); - unsigned NumMaskElts = MaskTy->getNumElements(); - unsigned NumOperandElts = OpTy->getNumElements(); if (NumMaskElts == NumOperandElts) { return SelectInst::Create(BoolVec, Op1, Op0); diff --git a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll index bccd189d12a82..67c9c333987f6 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll @@ -4,7 +4,7 @@ ; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s ; -; TODO: PR58895 - replace shuffled _mm_blendv_epi8+icmp with select+icmp +; PR58895 - replace shuffled _mm_blendv_epi8+icmp with select+icmp ; ; @@ -13,21 +13,21 @@ define <4 x double> @x86_pblendvb_v4f64_v2f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) { ; CHECK-LABEL: @x86_pblendvb_v4f64_v2f64( -; CHECK-NEXT: [[A_BC:%.*]] = bitcast <4 x double> [[A:%.*]] to <32 x i8> -; CHECK-NEXT: [[B_BC:%.*]] = bitcast <4 x double> [[B:%.*]] to <32 x i8> -; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[A_HI:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[B_HI:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> -; CHECK-NEXT: [[SEXT_BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <32 x i8> -; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <32 x i8> [[SEXT_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[SEXT_HI:%.*]] = shufflevector <32 x i8> [[SEXT_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_LO]], <16 x i8> [[B_LO]], <16 x i8> [[SEXT_LO]]) -; CHECK-NEXT: [[SEL_HI:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_HI]], <16 x i8> [[B_HI]], <16 x i8> [[SEXT_HI]]) -; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <16 x i8> [[SEL_LO]], <16 x i8> [[SEL_HI]], <32 x i32> -; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x double> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x double> [[A:%.*]] to <4 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[B:%.*]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP5]], <2 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x double> [[A]] to <4 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x double> [[B]] to <4 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP11]], <2 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP12]], <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = bitcast <4 x i64> [[TMP13]] to <4 x double> ; CHECK-NEXT: ret <4 x double> [[RES]] ; %a.bc = bitcast <4 x double> %a to <32 x i8> @@ -50,21 +50,21 @@ define <4 x double> @x86_pblendvb_v4f64_v2f64(<4 x double> %a, <4 x double> %b, define <8 x float> @x86_pblendvb_v8f32_v4f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) { ; CHECK-LABEL: @x86_pblendvb_v8f32_v4f32( -; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x float> [[A:%.*]] to <32 x i8> -; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x float> [[B:%.*]] to <32 x i8> -; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[A_HI:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[B_HI:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> -; CHECK-NEXT: [[SEXT_BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <32 x i8> -; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <32 x i8> [[SEXT_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[SEXT_HI:%.*]] = shufflevector <32 x i8> [[SEXT_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_LO]], <16 x i8> [[B_LO]], <16 x i8> [[SEXT_LO]]) -; CHECK-NEXT: [[SEL_HI:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_HI]], <16 x i8> [[B_HI]], <16 x i8> [[SEXT_HI]]) -; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <16 x i8> [[SEL_LO]], <16 x i8> [[SEL_HI]], <32 x i32> -; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <8 x float> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x float> [[A:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[B:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP5]], <4 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x float> [[A]] to <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x float> [[B]] to <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[TMP11]], <4 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP12]], <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP13]] to <8 x float> ; CHECK-NEXT: ret <8 x float> [[RES]] ; %a.bc = bitcast <8 x float> %a to <32 x i8> @@ -87,22 +87,9 @@ define <8 x float> @x86_pblendvb_v8f32_v4f32(<8 x float> %a, <8 x float> %b, <8 define <4 x i64> @x86_pblendvb_v4i64_v2i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) { ; CHECK-LABEL: @x86_pblendvb_v4i64_v2i64( -; CHECK-NEXT: [[A_BC:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8> -; CHECK-NEXT: [[B_BC:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8> -; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[A_HI:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[B_HI:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> -; CHECK-NEXT: [[SEXT_BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <32 x i8> -; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <32 x i8> [[SEXT_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[SEXT_HI:%.*]] = shufflevector <32 x i8> [[SEXT_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_LO]], <16 x i8> [[B_LO]], <16 x i8> [[SEXT_LO]]) -; CHECK-NEXT: [[SEL_HI:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_HI]], <16 x i8> [[B_HI]], <16 x i8> [[SEXT_HI]]) -; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <16 x i8> [[SEL_LO]], <16 x i8> [[SEL_HI]], <32 x i32> -; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64> -; CHECK-NEXT: ret <4 x i64> [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]] +; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %a.bc = bitcast <4 x i64> %a to <32 x i8> %b.bc = bitcast <4 x i64> %b to <32 x i8> @@ -124,23 +111,23 @@ define <4 x i64> @x86_pblendvb_v4i64_v2i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> define <4 x i64> @x86_pblendvb_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) { ; CHECK-LABEL: @x86_pblendvb_v8i32_v4i32( -; CHECK-NEXT: [[A_BC:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8> -; CHECK-NEXT: [[B_BC:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8> ; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32> ; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32> -; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[A_HI:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[B_HI:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> -; CHECK-NEXT: [[SEXT_BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <32 x i8> -; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <32 x i8> [[SEXT_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[SEXT_HI:%.*]] = shufflevector <32 x i8> [[SEXT_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_LO]], <16 x i8> [[B_LO]], <16 x i8> [[SEXT_LO]]) -; CHECK-NEXT: [[SEL_HI:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_HI]], <16 x i8> [[B_HI]], <16 x i8> [[SEXT_HI]]) -; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <16 x i8> [[SEL_LO]], <16 x i8> [[SEL_HI]], <32 x i32> -; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP5]], <4 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[TMP11]], <4 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP12]], <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP13]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[RES]] ; %a.bc = bitcast <4 x i64> %a to <32 x i8> @@ -165,23 +152,23 @@ define <4 x i64> @x86_pblendvb_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b, <4 x i64> define <4 x i64> @x86_pblendvb_v16i16_v8i16(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) { ; CHECK-LABEL: @x86_pblendvb_v16i16_v8i16( -; CHECK-NEXT: [[A_BC:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8> -; CHECK-NEXT: [[B_BC:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8> ; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16> ; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16> -; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[A_HI:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[B_HI:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16> -; CHECK-NEXT: [[SEXT_BC:%.*]] = bitcast <16 x i16> [[SEXT]] to <32 x i8> -; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <32 x i8> [[SEXT_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[SEXT_HI:%.*]] = shufflevector <32 x i8> [[SEXT_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_LO]], <16 x i8> [[B_LO]], <16 x i8> [[SEXT_LO]]) -; CHECK-NEXT: [[SEL_HI:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_HI]], <16 x i8> [[B_HI]], <16 x i8> [[SEXT_HI]]) -; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <16 x i8> [[SEL_LO]], <16 x i8> [[SEL_HI]], <32 x i32> -; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP4]], <16 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP5]], <8 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[A]] to <16 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP8]], <16 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i64> [[B]] to <16 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i16> [[TMP11]], <8 x i16> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[TMP6]], <8 x i16> [[TMP12]], <16 x i32> +; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP13]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[RES]] ; %a.bc = bitcast <4 x i64> %a to <32 x i8> @@ -210,17 +197,8 @@ define <4 x i64> @x86_pblendvb_v32i8_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64> ; CHECK-NEXT: [[B_BC:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8> ; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8> ; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8> -; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[A_HI:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[B_HI:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8> -; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <32 x i8> [[SEXT]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[SEXT_HI:%.*]] = shufflevector <32 x i8> [[SEXT]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_LO]], <16 x i8> [[B_LO]], <16 x i8> [[SEXT_LO]]) -; CHECK-NEXT: [[SEL_HI:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_HI]], <16 x i8> [[B_HI]], <16 x i8> [[SEXT_HI]]) -; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <16 x i8> [[SEL_LO]], <16 x i8> [[SEL_HI]], <32 x i32> +; CHECK-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[B_BC]], <32 x i8> [[A_BC]] ; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[RES]] ; @@ -249,21 +227,21 @@ define <4 x i64> @x86_pblendvb_v32i8_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64> define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) { ; CHECK-LABEL: @x86_pblendvb_v8f64_v4f64( -; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x double> [[A:%.*]] to <64 x i8> -; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x double> [[B:%.*]] to <64 x i8> -; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <64 x i8> [[A_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <64 x i8> [[B_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[A_HI:%.*]] = shufflevector <64 x i8> [[A_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[B_HI:%.*]] = shufflevector <64 x i8> [[B_BC]], <64 x i8> poison, <32 x i32> ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64> -; CHECK-NEXT: [[SEXT_BC:%.*]] = bitcast <8 x i64> [[SEXT]] to <64 x i8> -; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <64 x i8> [[SEXT_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[SEXT_HI:%.*]] = shufflevector <64 x i8> [[SEXT_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A_LO]], <32 x i8> [[B_LO]], <32 x i8> [[SEXT_LO]]) -; CHECK-NEXT: [[SEL_HI:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A_HI]], <32 x i8> [[B_HI]], <32 x i8> [[SEXT_HI]]) -; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <32 x i8> [[SEL_LO]], <32 x i8> [[SEL_HI]], <64 x i32> -; CHECK-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x double> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x double> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x double> [[B:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP5]], <4 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[A]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i64> [[TMP8]], <8 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x double> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP10]], <8 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP11]], <4 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP12]], <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i64> [[TMP13]] to <8 x double> ; CHECK-NEXT: ret <8 x double> [[RES]] ; %a.bc = bitcast <8 x double> %a to <64 x i8> @@ -286,21 +264,21 @@ define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b, define <16 x float> @x86_pblendvb_v16f32_v8f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) { ; CHECK-LABEL: @x86_pblendvb_v16f32_v8f32( -; CHECK-NEXT: [[A_BC:%.*]] = bitcast <16 x float> [[A:%.*]] to <64 x i8> -; CHECK-NEXT: [[B_BC:%.*]] = bitcast <16 x float> [[B:%.*]] to <64 x i8> -; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <64 x i8> [[A_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <64 x i8> [[B_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[A_HI:%.*]] = shufflevector <64 x i8> [[A_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[B_HI:%.*]] = shufflevector <64 x i8> [[B_BC]], <64 x i8> poison, <32 x i32> ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32> -; CHECK-NEXT: [[SEXT_BC:%.*]] = bitcast <16 x i32> [[SEXT]] to <64 x i8> -; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <64 x i8> [[SEXT_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[SEXT_HI:%.*]] = shufflevector <64 x i8> [[SEXT_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A_LO]], <32 x i8> [[B_LO]], <32 x i8> [[SEXT_LO]]) -; CHECK-NEXT: [[SEL_HI:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A_HI]], <32 x i8> [[B_HI]], <32 x i8> [[SEXT_HI]]) -; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <32 x i8> [[SEL_LO]], <32 x i8> [[SEL_HI]], <64 x i32> -; CHECK-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <16 x float> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x float> [[A:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x float> [[B:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP5]], <8 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[A]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP8]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[B]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP11]], <8 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP12]], <16 x i32> +; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP13]] to <16 x float> ; CHECK-NEXT: ret <16 x float> [[RES]] ; %a.bc = bitcast <16 x float> %a to <64 x i8> @@ -323,22 +301,9 @@ define <16 x float> @x86_pblendvb_v16f32_v8f32(<16 x float> %a, <16 x float> %b, define <8 x i64> @x86_pblendvb_v8i64_v4i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { ; CHECK-LABEL: @x86_pblendvb_v8i64_v4i64( -; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8> -; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8> -; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <64 x i8> [[A_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <64 x i8> [[B_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[A_HI:%.*]] = shufflevector <64 x i8> [[A_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[B_HI:%.*]] = shufflevector <64 x i8> [[B_BC]], <64 x i8> poison, <32 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64> -; CHECK-NEXT: [[SEXT_BC:%.*]] = bitcast <8 x i64> [[SEXT]] to <64 x i8> -; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <64 x i8> [[SEXT_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[SEXT_HI:%.*]] = shufflevector <64 x i8> [[SEXT_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A_LO]], <32 x i8> [[B_LO]], <32 x i8> [[SEXT_LO]]) -; CHECK-NEXT: [[SEL_HI:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A_HI]], <32 x i8> [[B_HI]], <32 x i8> [[SEXT_HI]]) -; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <32 x i8> [[SEL_LO]], <32 x i8> [[SEL_HI]], <64 x i32> -; CHECK-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64> -; CHECK-NEXT: ret <8 x i64> [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]] +; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %a.bc = bitcast <8 x i64> %a to <64 x i8> %b.bc = bitcast <8 x i64> %b to <64 x i8> @@ -360,23 +325,23 @@ define <8 x i64> @x86_pblendvb_v8i64_v4i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> define <8 x i64> @x86_pblendvb_v16i32_v8i32(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { ; CHECK-LABEL: @x86_pblendvb_v16i32_v8i32( -; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8> -; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8> -; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[A]] to <16 x i32> -; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[B]] to <16 x i32> -; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <64 x i8> [[A_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <64 x i8> [[B_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[A_HI:%.*]] = shufflevector <64 x i8> [[A_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[B_HI:%.*]] = shufflevector <64 x i8> [[B_BC]], <64 x i8> poison, <32 x i32> +; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> +; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32> -; CHECK-NEXT: [[SEXT_BC:%.*]] = bitcast <16 x i32> [[SEXT]] to <64 x i8> -; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <64 x i8> [[SEXT_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[SEXT_HI:%.*]] = shufflevector <64 x i8> [[SEXT_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A_LO]], <32 x i8> [[B_LO]], <32 x i8> [[SEXT_LO]]) -; CHECK-NEXT: [[SEL_HI:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A_HI]], <32 x i8> [[B_HI]], <32 x i8> [[SEXT_HI]]) -; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <32 x i8> [[SEL_LO]], <32 x i8> [[SEL_HI]], <64 x i32> -; CHECK-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[A]] to <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[B]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP5]], <8 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[A]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP8]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[B]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP11]], <8 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP12]], <16 x i32> +; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP13]] to <8 x i64> ; CHECK-NEXT: ret <8 x i64> [[RES]] ; %a.bc = bitcast <8 x i64> %a to <64 x i8> @@ -401,23 +366,23 @@ define <8 x i64> @x86_pblendvb_v16i32_v8i32(<8 x i64> %a, <8 x i64> %b, <8 x i64 define <8 x i64> @x86_pblendvb_v32i16_v16i16(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { ; CHECK-LABEL: @x86_pblendvb_v32i16_v16i16( -; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8> -; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8> ; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16> ; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16> -; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <64 x i8> [[A_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <64 x i8> [[B_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[A_HI:%.*]] = shufflevector <64 x i8> [[A_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[B_HI:%.*]] = shufflevector <64 x i8> [[B_BC]], <64 x i8> poison, <32 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16> -; CHECK-NEXT: [[SEXT_BC:%.*]] = bitcast <32 x i16> [[SEXT]] to <64 x i8> -; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <64 x i8> [[SEXT_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[SEXT_HI:%.*]] = shufflevector <64 x i8> [[SEXT_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A_LO]], <32 x i8> [[B_LO]], <32 x i8> [[SEXT_LO]]) -; CHECK-NEXT: [[SEL_HI:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A_HI]], <32 x i8> [[B_HI]], <32 x i8> [[SEXT_HI]]) -; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <32 x i8> [[SEL_LO]], <32 x i8> [[SEL_HI]], <64 x i32> -; CHECK-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i1> [[CMP]], <32 x i1> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP5]], <16 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <32 x i1> [[CMP]], <32 x i1> poison, <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[A]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i16> [[TMP8]], <32 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[B]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <32 x i16> [[TMP10]], <32 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i16> [[TMP11]], <16 x i16> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i16> [[TMP6]], <16 x i16> [[TMP12]], <32 x i32> +; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP13]] to <8 x i64> ; CHECK-NEXT: ret <8 x i64> [[RES]] ; %a.bc = bitcast <8 x i64> %a to <64 x i8> @@ -446,17 +411,8 @@ define <8 x i64> @x86_pblendvb_v64i8_v32i8(<8 x i64> %a, <8 x i64> %b, <8 x i64> ; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8> ; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8> ; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8> -; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <64 x i8> [[A_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <64 x i8> [[B_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[A_HI:%.*]] = shufflevector <64 x i8> [[A_BC]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[B_HI:%.*]] = shufflevector <64 x i8> [[B_BC]], <64 x i8> poison, <32 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8> -; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <64 x i8> [[SEXT]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[SEXT_HI:%.*]] = shufflevector <64 x i8> [[SEXT]], <64 x i8> poison, <32 x i32> -; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A_LO]], <32 x i8> [[B_LO]], <32 x i8> [[SEXT_LO]]) -; CHECK-NEXT: [[SEL_HI:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A_HI]], <32 x i8> [[B_HI]], <32 x i8> [[SEXT_HI]]) -; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <32 x i8> [[SEL_LO]], <32 x i8> [[SEL_HI]], <64 x i32> +; CHECK-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[B_BC]], <64 x i8> [[A_BC]] ; CHECK-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64> ; CHECK-NEXT: ret <8 x i64> [[RES]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll index c7818e8254b2e..7b690341fb805 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll @@ -9,23 +9,26 @@ define <4 x i64> @PR67803(<4 x i64> %x, <4 x i64> %y, <4 x i64> %a, <4 x i64> %b ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[Y:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <32 x i8> [[TMP4]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <32 x i8> [[TMP6]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i8> [[TMP8]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[TMP5]], <16 x i8> [[TMP7]], <16 x i8> [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i64> [[A]] to <32 x i8> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <32 x i8> [[TMP11]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i64> [[B]] to <32 x i8> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8> -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <32 x i8> [[TMP15]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[TMP12]], <16 x i8> [[TMP14]], <16 x i8> [[TMP16]]) -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> [[TMP17]], <32 x i32> -; CHECK-NEXT: [[SHUFFLE_I23:%.*]] = bitcast <32 x i8> [[TMP18]] to <4 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i8> [[TMP3]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <32 x i8> [[TMP5]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[TMP9]], <4 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i64> [[A]] to <32 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i64> [[B]] to <32 x i8> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <32 x i8> [[TMP14]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i8> [[TMP13]] to <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP15]] to <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = select <4 x i1> [[TMP16]], <4 x i32> [[TMP18]], <4 x i32> [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <4 x i32> [[TMP19]] to <16 x i8> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x i8> [[TMP11]], <16 x i8> [[TMP20]], <32 x i32> +; CHECK-NEXT: [[SHUFFLE_I23:%.*]] = bitcast <32 x i8> [[TMP21]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[SHUFFLE_I23]] ; entry: