Skip to content

Commit d8b63b6

Browse files
authored
AMDGPU: Don't fold clamp/omod modifiers without nofpexcept (#95950)
1 parent 773ee62 commit d8b63b6

File tree

3 files changed

+108
-5
lines changed

3 files changed

+108
-5
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1519,6 +1519,9 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
15191519
case AMDGPU::V_MAX_F64_e64:
15201520
case AMDGPU::V_MAX_NUM_F64_e64:
15211521
case AMDGPU::V_PK_MAX_F16: {
1522+
if (MI.mayRaiseFPException())
1523+
return nullptr;
1524+
15221525
if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
15231526
return nullptr;
15241527

@@ -1565,6 +1568,9 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
15651568
if (TII->getClampMask(*Def) != TII->getClampMask(MI))
15661569
return false;
15671570

1571+
if (Def->mayRaiseFPException())
1572+
return false;
1573+
15681574
MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
15691575
if (!DefClamp)
15701576
return false;
@@ -1650,7 +1656,9 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
16501656
((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F64_pseudo_e64 ||
16511657
Op == AMDGPU::V_MUL_F16_e64 || Op == AMDGPU::V_MUL_F16_t16_e64 ||
16521658
Op == AMDGPU::V_MUL_F16_fake16_e64) &&
1653-
MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign))
1659+
MFI->getMode().FP64FP16Denormals.Output !=
1660+
DenormalMode::PreserveSign) ||
1661+
MI.mayRaiseFPException())
16541662
return std::pair(nullptr, SIOutMods::NONE);
16551663

16561664
const MachineOperand *RegOp = nullptr;
@@ -1725,6 +1733,9 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
17251733
if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
17261734
return false;
17271735

1736+
if (Def->mayRaiseFPException())
1737+
return false;
1738+
17281739
// Clamp is applied after omod. If the source already has clamp set, don't
17291740
// fold it.
17301741
if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3972,7 +3972,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39723972
.add(*Dst)
39733973
.add(*Src0)
39743974
.add(*Src1)
3975-
.addImm(Imm);
3975+
.addImm(Imm)
3976+
.setMIFlags(MI.getFlags());
39763977
updateLiveVariables(LV, MI, *MIB);
39773978
if (LIS)
39783979
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
@@ -3991,7 +3992,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39913992
.add(*Dst)
39923993
.add(*Src0)
39933994
.addImm(Imm)
3994-
.add(*Src2);
3995+
.add(*Src2)
3996+
.setMIFlags(MI.getFlags());
39953997
updateLiveVariables(LV, MI, *MIB);
39963998
if (LIS)
39973999
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
@@ -4012,7 +4014,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40124014
.add(*Dst)
40134015
.add(*Src1)
40144016
.addImm(Imm)
4015-
.add(*Src2);
4017+
.add(*Src2)
4018+
.setMIFlags(MI.getFlags());
40164019
updateLiveVariables(LV, MI, *MIB);
40174020
if (LIS)
40184021
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
@@ -4048,7 +4051,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40484051
.addImm(Src2Mods ? Src2Mods->getImm() : 0)
40494052
.add(*Src2)
40504053
.addImm(Clamp ? Clamp->getImm() : 0)
4051-
.addImm(Omod ? Omod->getImm() : 0);
4054+
.addImm(Omod ? Omod->getImm() : 0)
4055+
.setMIFlags(MI.getFlags());
40524056
if (AMDGPU::hasNamedOperand(NewOpc, AMDGPU::OpName::op_sel))
40534057
MIB.addImm(OpSel ? OpSel->getImm() : 0);
40544058
updateLiveVariables(LV, MI, *MIB);

llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,3 +410,91 @@ body: |
410410
%1 = V_MAX_F32_e64 0, killed %0, 0, 1056964608, 1, 0, implicit $mode, implicit $exec
411411
412412
...
413+
414+
---
415+
# GCN-LABEL: name: clamp_missing_nofpexcept_0
416+
# GCN: %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
417+
# GCN-NEXT: %3:vgpr_32 = V_MAX_F32_e64 0, killed %2, 0, killed %2, 1, 0, implicit $mode, implicit $exec
418+
name: clamp_missing_nofpexcept_0
419+
tracksRegLiveness: true
420+
machineFunctionInfo:
421+
mode:
422+
ieee: false
423+
fp32-input-denormals: false
424+
fp32-output-denormals: false
425+
426+
body: |
427+
bb.0:
428+
liveins: $vgpr0, $vgpr1
429+
430+
%0:vgpr_32 = COPY $vgpr0
431+
%1:vgpr_32 = COPY $vgpr1
432+
%2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
433+
%3:vgpr_32 = V_MAX_F32_e64 0, killed %2, 0, killed %2, 1, 0, implicit $mode, implicit $exec
434+
...
435+
436+
---
437+
# GCN-LABEL: name: clamp_missing_nofpexcept_1
438+
# GCN: %2:vgpr_32 = V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
439+
# GCN-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 0, killed %2, 0, killed %2, 1, 0, implicit $mode, implicit $exec
440+
name: clamp_missing_nofpexcept_1
441+
tracksRegLiveness: true
442+
machineFunctionInfo:
443+
mode:
444+
ieee: false
445+
fp32-input-denormals: false
446+
fp32-output-denormals: false
447+
448+
body: |
449+
bb.0:
450+
liveins: $vgpr0, $vgpr1
451+
452+
%0:vgpr_32 = COPY $vgpr0
453+
%1:vgpr_32 = COPY $vgpr1
454+
%2:vgpr_32 = V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
455+
%3:vgpr_32 = nofpexcept V_MAX_F32_e64 0, killed %2, 0, killed %2, 1, 0, implicit $mode, implicit $exec
456+
...
457+
458+
---
459+
# GCN-LABEL: name: omod_missing_nofpexcept_0
460+
# GCN: %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
461+
# GCN-NEXT: %3:vgpr_32 = nsz V_MUL_F32_e64 0, killed %2, 0, 1056964608, 0, 0, implicit $mode, implicit $exec
462+
name: omod_missing_nofpexcept_0
463+
tracksRegLiveness: true
464+
machineFunctionInfo:
465+
mode:
466+
ieee: false
467+
fp32-input-denormals: false
468+
fp32-output-denormals: false
469+
body: |
470+
bb.0:
471+
liveins: $vgpr0, $vgpr1
472+
473+
%0:vgpr_32 = COPY $vgpr0
474+
%1:vgpr_32 = COPY $vgpr1
475+
%2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
476+
%3:vgpr_32 = nsz V_MUL_F32_e64 0, killed %2, 0, 1056964608, 0, 0, implicit $mode, implicit $exec
477+
478+
...
479+
480+
---
481+
# GCN-LABEL: name: omod_missing_nofpexcept_1
482+
# GCN: %2:vgpr_32 = V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
483+
# GCN-NEXT: %3:vgpr_32 = nsz nofpexcept V_MUL_F32_e64 0, killed %2, 0, 1056964608, 0, 0, implicit $mode, implicit $exec
484+
name: omod_missing_nofpexcept_1
485+
tracksRegLiveness: true
486+
machineFunctionInfo:
487+
mode:
488+
ieee: false
489+
fp32-input-denormals: false
490+
fp32-output-denormals: false
491+
body: |
492+
bb.0:
493+
liveins: $vgpr0, $vgpr1
494+
495+
%0:vgpr_32 = COPY $vgpr0
496+
%1:vgpr_32 = COPY $vgpr1
497+
%2:vgpr_32 = V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
498+
%3:vgpr_32 = nsz nofpexcept V_MUL_F32_e64 0, killed %2, 0, 1056964608, 0, 0, implicit $mode, implicit $exec
499+
500+
...

0 commit comments

Comments
 (0)