@@ -21616,15 +21616,25 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
21616
21616
// Extend everything to 80 bits to force it to be done on x87.
21617
21617
// TODO: Are there any fast-math-flags to propagate here?
21618
21618
if (IsStrict) {
21619
- SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other},
21620
- {Chain, Fild, Fudge});
21619
+ unsigned Opc = ISD::STRICT_FADD;
21620
+ // Windows needs the precision control changed to 80bits around this add.
21621
+ if (Subtarget.isOSWindows() && DstVT == MVT::f32)
21622
+ Opc = X86ISD::STRICT_FP80_ADD;
21623
+
21624
+ SDValue Add =
21625
+ DAG.getNode(Opc, dl, {MVT::f80, MVT::Other}, {Chain, Fild, Fudge});
21621
21626
// STRICT_FP_ROUND can't handle equal types.
21622
21627
if (DstVT == MVT::f80)
21623
21628
return Add;
21624
21629
return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other},
21625
21630
{Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)});
21626
21631
}
21627
- SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
21632
+ unsigned Opc = ISD::FADD;
21633
+ // Windows needs the precision control changed to 80bits around this add.
21634
+ if (Subtarget.isOSWindows() && DstVT == MVT::f32)
21635
+ Opc = X86ISD::FP80_ADD;
21636
+
21637
+ SDValue Add = DAG.getNode(Opc, dl, MVT::f80, Fild, Fudge);
21628
21638
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
21629
21639
DAG.getIntPtrConstant(0, dl));
21630
21640
}
@@ -33830,6 +33840,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
33830
33840
NODE_NAME_CASE(AESENCWIDE256KL)
33831
33841
NODE_NAME_CASE(AESDECWIDE256KL)
33832
33842
NODE_NAME_CASE(TESTUI)
33843
+ NODE_NAME_CASE(FP80_ADD)
33844
+ NODE_NAME_CASE(STRICT_FP80_ADD)
33833
33845
}
33834
33846
return nullptr;
33835
33847
#undef NODE_NAME_CASE
@@ -36340,6 +36352,69 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
36340
36352
return BB;
36341
36353
}
36342
36354
36355
+ case X86::FP80_ADDr:
36356
+ case X86::FP80_ADDm32: {
36357
+ // Change the floating point control register to use double extended
36358
+ // precision when performing the addition.
36359
+ int OrigCWFrameIdx =
36360
+ MF->getFrameInfo().CreateStackObject(2, Align(2), false);
36361
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FNSTCW16m)),
36362
+ OrigCWFrameIdx);
36363
+
36364
+ // Load the old value of the control word...
36365
+ Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
36366
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW),
36367
+ OrigCWFrameIdx);
36368
+
36369
+ // OR 0b11 into bit 8 and 9. 0b11 is the encoding for double extended
36370
+ // precision.
36371
+ Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
36372
+ BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW)
36373
+ .addReg(OldCW, RegState::Kill)
36374
+ .addImm(0x300);
36375
+
36376
+ // Extract to 16 bits.
36377
+ Register NewCW16 =
36378
+ MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
36379
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16)
36380
+ .addReg(NewCW, RegState::Kill, X86::sub_16bit);
36381
+
36382
+ // Prepare memory for FLDCW.
36383
+ int NewCWFrameIdx =
36384
+ MF->getFrameInfo().CreateStackObject(2, Align(2), false);
36385
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)),
36386
+ NewCWFrameIdx)
36387
+ .addReg(NewCW16, RegState::Kill);
36388
+
36389
+ // Reload the modified control word now...
36390
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)),
36391
+ NewCWFrameIdx);
36392
+
36393
+ // Do the addition.
36394
+ if (MI.getOpcode() == X86::FP80_ADDr) {
36395
+ BuildMI(*BB, MI, DL, TII->get(X86::ADD_Fp80))
36396
+ .add(MI.getOperand(0))
36397
+ .add(MI.getOperand(1))
36398
+ .add(MI.getOperand(2));
36399
+ } else {
36400
+ BuildMI(*BB, MI, DL, TII->get(X86::ADD_Fp80m32))
36401
+ .add(MI.getOperand(0))
36402
+ .add(MI.getOperand(1))
36403
+ .add(MI.getOperand(2))
36404
+ .add(MI.getOperand(3))
36405
+ .add(MI.getOperand(4))
36406
+ .add(MI.getOperand(5))
36407
+ .add(MI.getOperand(6));
36408
+ }
36409
+
36410
+ // Reload the original control word now.
36411
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)),
36412
+ OrigCWFrameIdx);
36413
+
36414
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
36415
+ return BB;
36416
+ }
36417
+
36343
36418
case X86::FP32_TO_INT16_IN_MEM:
36344
36419
case X86::FP32_TO_INT32_IN_MEM:
36345
36420
case X86::FP32_TO_INT64_IN_MEM:
0 commit comments