Skip to content

Commit ea95668

Browse files
committed
GlobalISel: Implement s32->s64 G_FPTOSI lowering
Port directly from DAG version. The lowering for G_FPTOUI used to fail on AMDGPU because it uses G_FPTOSI.
1 parent b21571f commit ea95668

File tree

5 files changed

+666
-1
lines changed

5 files changed

+666
-1
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ class LegalizerHelper {
252252
LegalizeResult lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
253253
LegalizeResult lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
254254
LegalizeResult lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
255+
LegalizeResult lowerFPTOSI(MachineInstr &MI);
255256
LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
256257
LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
257258
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2438,6 +2438,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
24382438
return lowerSITOFP(MI, TypeIdx, Ty);
24392439
case G_FPTOUI:
24402440
return lowerFPTOUI(MI, TypeIdx, Ty);
2441+
case G_FPTOSI:
2442+
return lowerFPTOSI(MI);
24412443
case G_SMIN:
24422444
case G_SMAX:
24432445
case G_UMIN:
@@ -4315,6 +4317,73 @@ LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
43154317
return Legalized;
43164318
}
43174319

4320+
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
4321+
Register Dst = MI.getOperand(0).getReg();
4322+
Register Src = MI.getOperand(1).getReg();
4323+
LLT DstTy = MRI.getType(Dst);
4324+
LLT SrcTy = MRI.getType(Src);
4325+
const LLT S64 = LLT::scalar(64);
4326+
const LLT S32 = LLT::scalar(32);
4327+
4328+
// FIXME: Only f32 to i64 conversions are supported.
4329+
if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
4330+
return UnableToLegalize;
4331+
4332+
// Expand f32 -> i64 conversion
4333+
// This algorithm comes from compiler-rt's implementation of fixsfdi:
4334+
// https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
4335+
4336+
unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
4337+
4338+
auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
4339+
auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
4340+
4341+
auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
4342+
auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
4343+
4344+
auto SignMask = MIRBuilder.buildConstant(SrcTy,
4345+
APInt::getSignMask(SrcEltBits));
4346+
auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
4347+
auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
4348+
auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
4349+
Sign = MIRBuilder.buildSExt(DstTy, Sign);
4350+
4351+
auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
4352+
auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
4353+
auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
4354+
4355+
auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
4356+
R = MIRBuilder.buildZExt(DstTy, R);
4357+
4358+
auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
4359+
auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
4360+
auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
4361+
auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
4362+
4363+
auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
4364+
auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
4365+
4366+
const LLT S1 = LLT::scalar(1);
4367+
auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
4368+
S1, Exponent, ExponentLoBit);
4369+
4370+
R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
4371+
4372+
auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
4373+
auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
4374+
4375+
auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
4376+
4377+
auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
4378+
S1, Exponent, ZeroSrcTy);
4379+
4380+
auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
4381+
MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
4382+
4383+
MI.eraseFromParent();
4384+
return Legalized;
4385+
}
4386+
43184387
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
43194388
switch (Opc) {
43204389
case TargetOpcode::G_SMIN:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
465465
FPToI.minScalar(1, S32);
466466

467467
FPToI.minScalar(0, S32)
468-
.scalarize(0);
468+
.scalarize(0)
469+
.lower();
469470

470471
getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
471472
.scalarize(0)

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,3 +354,215 @@ body: |
354354
%1:_(<2 x s64>) = G_FPTOSI %0
355355
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
356356
...
357+
358+
---
359+
name: test_fptosi_s32_to_s64
360+
body: |
361+
bb.0:
362+
liveins: $vgpr0
363+
364+
; SI-LABEL: name: test_fptosi_s32_to_s64
365+
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
366+
; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2139095040
367+
; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
368+
; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
369+
; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C1]](s32)
370+
; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
371+
; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]]
372+
; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
373+
; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[AND1]], [[C3]](s32)
374+
; SI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[ASHR]](s32)
375+
; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8388607
376+
; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C4]]
377+
; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8388608
378+
; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[C5]]
379+
; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32)
380+
; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
381+
; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[C6]]
382+
; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C1]]
383+
; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SUB]]
384+
; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[SUB1]](s32)
385+
; SI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[ZEXT]], [[SUB2]](s32)
386+
; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C1]]
387+
; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[LSHR1]]
388+
; SI: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[SELECT]], [[SEXT]]
389+
; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
390+
; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64)
391+
; SI: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]]
392+
; SI: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]]
393+
; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
394+
; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
395+
; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C7]]
396+
; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
397+
; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C8]], [[MV]]
398+
; SI: $vgpr0_vgpr1 = COPY [[SELECT1]](s64)
399+
; VI-LABEL: name: test_fptosi_s32_to_s64
400+
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
401+
; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2139095040
402+
; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
403+
; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
404+
; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C1]](s32)
405+
; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
406+
; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]]
407+
; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
408+
; VI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[AND1]], [[C3]](s32)
409+
; VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[ASHR]](s32)
410+
; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8388607
411+
; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C4]]
412+
; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8388608
413+
; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[C5]]
414+
; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32)
415+
; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
416+
; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[C6]]
417+
; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C1]]
418+
; VI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SUB]]
419+
; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[SUB1]](s32)
420+
; VI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[ZEXT]], [[SUB2]](s32)
421+
; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C1]]
422+
; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[LSHR1]]
423+
; VI: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[SELECT]], [[SEXT]]
424+
; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
425+
; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64)
426+
; VI: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]]
427+
; VI: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]]
428+
; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
429+
; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
430+
; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C7]]
431+
; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
432+
; VI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C8]], [[MV]]
433+
; VI: $vgpr0_vgpr1 = COPY [[SELECT1]](s64)
434+
%0:_(s32) = COPY $vgpr0
435+
%1:_(s64) = G_FPTOSI %0
436+
$vgpr0_vgpr1 = COPY %1
437+
...
438+
439+
---
440+
name: test_fptosi_v2s32_to_v2s64
441+
body: |
442+
bb.0:
443+
liveins: $vgpr0_vgpr1
444+
445+
; SI-LABEL: name: test_fptosi_v2s32_to_v2s64
446+
; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
447+
; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
448+
; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2139095040
449+
; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
450+
; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
451+
; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C1]](s32)
452+
; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
453+
; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
454+
; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
455+
; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[AND1]], [[C3]](s32)
456+
; SI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[ASHR]](s32)
457+
; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8388607
458+
; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C4]]
459+
; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8388608
460+
; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[C5]]
461+
; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32)
462+
; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
463+
; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[C6]]
464+
; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C1]]
465+
; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SUB]]
466+
; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[SUB1]](s32)
467+
; SI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[ZEXT]], [[SUB2]](s32)
468+
; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C1]]
469+
; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[LSHR1]]
470+
; SI: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[SELECT]], [[SEXT]]
471+
; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
472+
; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64)
473+
; SI: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
474+
; SI: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
475+
; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
476+
; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
477+
; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C7]]
478+
; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
479+
; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C8]], [[MV]]
480+
; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
481+
; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32)
482+
; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
483+
; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[AND4]], [[C3]](s32)
484+
; SI: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[ASHR1]](s32)
485+
; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]]
486+
; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[C5]]
487+
; SI: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR1]](s32)
488+
; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LSHR2]], [[C6]]
489+
; SI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB3]], [[C1]]
490+
; SI: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SUB3]]
491+
; SI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[SUB4]](s32)
492+
; SI: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[ZEXT1]], [[SUB5]](s32)
493+
; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB3]](s32), [[C1]]
494+
; SI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL1]], [[LSHR3]]
495+
; SI: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[SELECT2]], [[SEXT1]]
496+
; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
497+
; SI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT1]](s64)
498+
; SI: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]]
499+
; SI: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO3]]
500+
; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
501+
; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB3]](s32), [[C7]]
502+
; SI: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[C8]], [[MV1]]
503+
; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64)
504+
; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
505+
; VI-LABEL: name: test_fptosi_v2s32_to_v2s64
506+
; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
507+
; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
508+
; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2139095040
509+
; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
510+
; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
511+
; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C1]](s32)
512+
; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
513+
; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
514+
; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
515+
; VI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[AND1]], [[C3]](s32)
516+
; VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[ASHR]](s32)
517+
; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8388607
518+
; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C4]]
519+
; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8388608
520+
; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[C5]]
521+
; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32)
522+
; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
523+
; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[C6]]
524+
; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C1]]
525+
; VI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SUB]]
526+
; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[SUB1]](s32)
527+
; VI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[ZEXT]], [[SUB2]](s32)
528+
; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C1]]
529+
; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[LSHR1]]
530+
; VI: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[SELECT]], [[SEXT]]
531+
; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
532+
; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64)
533+
; VI: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
534+
; VI: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
535+
; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
536+
; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
537+
; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C7]]
538+
; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
539+
; VI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C8]], [[MV]]
540+
; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
541+
; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32)
542+
; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
543+
; VI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[AND4]], [[C3]](s32)
544+
; VI: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[ASHR1]](s32)
545+
; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]]
546+
; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[C5]]
547+
; VI: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR1]](s32)
548+
; VI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LSHR2]], [[C6]]
549+
; VI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB3]], [[C1]]
550+
; VI: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SUB3]]
551+
; VI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[SUB4]](s32)
552+
; VI: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[ZEXT1]], [[SUB5]](s32)
553+
; VI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB3]](s32), [[C1]]
554+
; VI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL1]], [[LSHR3]]
555+
; VI: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[SELECT2]], [[SEXT1]]
556+
; VI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
557+
; VI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT1]](s64)
558+
; VI: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]]
559+
; VI: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO3]]
560+
; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
561+
; VI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB3]](s32), [[C7]]
562+
; VI: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[C8]], [[MV1]]
563+
; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64)
564+
; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
565+
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
566+
%1:_(<2 x s64>) = G_FPTOSI %0
567+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
568+
...

0 commit comments

Comments
 (0)