-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[SDAG] Add missing ppc_fp128 ExpandFloatRes legalization for modf #127895
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-powerpc Author: Benjamin Maxwell (MacDue) ChangesShould fix: https://lab.llvm.org/buildbot/#/builders/72/builds/8380 ( Full diff: https://github.com/llvm/llvm-project/pull/127895.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 4b79bd28e2750..0244c170a2123 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1569,6 +1569,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
case ISD::STRICT_FREM:
case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
+ case ISD::FMODF: ExpandFloatRes_FMODF(N); break;
// clang-format on
}
@@ -1619,6 +1620,23 @@ void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
GetPairElements(Tmp.first, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FMODF(SDNode *N) {
+ ExpandFloatRes_UnaryWithTwoFPResults(N, RTLIB::getMODF(N->getValueType(0)),
+ /*CallRetResNo=*/0);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults(
+ SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) {
+ assert(!N->isStrictFPOpcode() && "strictfp not implemented");
+ SmallVector<SDValue> Results;
+ DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo);
+ for (auto [ResNo, Res] : enumerate(Results)) {
+ SDValue Lo, Hi;
+ GetPairElements(Res, Lo, Hi);
+ SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
+ }
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
SDValue &Hi) {
assert(N->getValueType(0) == MVT::ppcf128 &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 69c687a797485..cac969f7e2185 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -668,6 +668,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_UnaryWithTwoFPResults(
+ SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo = {});
+
// clang-format off
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -714,6 +717,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMODF(SDNode *N);
// clang-format on
// Float Operand Expansion.
diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
new file mode 100644
index 0000000000000..69e3b22c7352c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
@@ -0,0 +1,330 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
+; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s
+
+define { half, half } @test_modf_f16(half %a) {
+; CHECK-LABEL: test_modf_f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: addi r4, r1, 44
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfs f2, 44(r1)
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ ret { half, half } %result
+}
+
+define half @test_modf_f16_only_use_fractional_part(half %a) {
+; CHECK-LABEL: test_modf_f16_only_use_fractional_part:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: addi r4, r1, 44
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ %result.0 = extractvalue { half, half } %result, 0
+ ret half %result.0
+}
+
+define half @test_modf_f16_only_use_integral_part(half %a) {
+; CHECK-LABEL: test_modf_f16_only_use_integral_part:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: addi r4, r1, 44
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfs f1, 44(r1)
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ %result.1 = extractvalue { half, half } %result, 1
+ ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) {
+; CHECK-LABEL: test_modf_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: .cfi_offset f30, -16
+; CHECK-NEXT: .cfi_offset f31, -8
+; CHECK-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stdu r1, -64(r1)
+; CHECK-NEXT: std r0, 80(r1)
+; CHECK-NEXT: xscvdphp f0, f2
+; CHECK-NEXT: addi r4, r1, 40
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f31, f0
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: addi r4, r1, 44
+; CHECK-NEXT: fmr f30, f1
+; CHECK-NEXT: fmr f1, f31
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfs f3, 40(r1)
+; CHECK-NEXT: fmr f2, f1
+; CHECK-NEXT: fmr f1, f30
+; CHECK-NEXT: lfs f4, 44(r1)
+; CHECK-NEXT: addi r1, r1, 64
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { <2 x half>, <2 x half> } @llvm.modf.v2f16(<2 x half> %a)
+ ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_modf_f32(float %a) {
+; CHECK-LABEL: test_modf_f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: addi r4, r1, 44
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfs f2, 44(r1)
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { float, float } @llvm.modf.f32(float %a)
+ ret { float, float } %result
+}
+
+define { <3 x float>, <3 x float> } @test_modf_v3f32(<3 x float> %a) {
+; CHECK-LABEL: test_modf_v3f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -112(r1)
+; CHECK-NEXT: std r0, 128(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 112
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: .cfi_offset r28, -32
+; CHECK-NEXT: .cfi_offset r29, -24
+; CHECK-NEXT: .cfi_offset r30, -16
+; CHECK-NEXT: .cfi_offset v30, -64
+; CHECK-NEXT: .cfi_offset v31, -48
+; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
+; CHECK-NEXT: std r30, 96(r1) # 8-byte Folded Spill
+; CHECK-NEXT: addi r30, r1, 36
+; CHECK-NEXT: std r28, 80(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
+; CHECK-NEXT: std r29, 88(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT: mr r4, r30
+; CHECK-NEXT: vmr v31, v2
+; CHECK-NEXT: xscvspdpn f1, vs0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd vs0, v31
+; CHECK-NEXT: addi r29, r1, 40
+; CHECK-NEXT: xscvdpspn v30, f1
+; CHECK-NEXT: mr r4, r29
+; CHECK-NEXT: xscvspdpn f1, vs0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: xscvdpspn vs0, f1
+; CHECK-NEXT: addi r28, r1, 44
+; CHECK-NEXT: mr r4, r28
+; CHECK-NEXT: xxmrghw v30, vs0, v30
+; CHECK-NEXT: xxsldwi vs0, v31, v31, 1
+; CHECK-NEXT: xscvspdpn f1, vs0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: addis r3, r2, .LCPI5_0@toc@ha
+; CHECK-NEXT: xscvdpspn v2, f1
+; CHECK-NEXT: lfiwzx f1, 0, r30
+; CHECK-NEXT: lfiwzx f2, 0, r29
+; CHECK-NEXT: lxsiwzx v3, 0, r28
+; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
+; CHECK-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, 88(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r28, 80(r1) # 8-byte Folded Reload
+; CHECK-NEXT: addi r3, r3, .LCPI5_0@toc@l
+; CHECK-NEXT: lxv vs0, 0(r3)
+; CHECK-NEXT: xxmrghw v4, vs2, vs1
+; CHECK-NEXT: xxperm v2, v30, vs0
+; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
+; CHECK-NEXT: xxperm v3, v4, vs0
+; CHECK-NEXT: addi r1, r1, 112
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { <3 x float>, <3 x float> } @llvm.modf.v3f32(<3 x float> %a)
+ ret { <3 x float>, <3 x float> } %result
+}
+
+define { <2 x float>, <2 x float> } @test_modf_v2f32(<2 x float> %a) {
+; CHECK-LABEL: test_modf_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -112(r1)
+; CHECK-NEXT: std r0, 128(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 112
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: .cfi_offset r29, -24
+; CHECK-NEXT: .cfi_offset r30, -16
+; CHECK-NEXT: .cfi_offset v30, -64
+; CHECK-NEXT: .cfi_offset v31, -48
+; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
+; CHECK-NEXT: std r30, 96(r1) # 8-byte Folded Spill
+; CHECK-NEXT: addi r30, r1, 40
+; CHECK-NEXT: std r29, 88(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
+; CHECK-NEXT: mr r4, r30
+; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT: xscvspdpn f1, vs0
+; CHECK-NEXT: vmr v31, v2
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd vs0, v31
+; CHECK-NEXT: addi r29, r1, 44
+; CHECK-NEXT: xscvdpspn v30, f1
+; CHECK-NEXT: mr r4, r29
+; CHECK-NEXT: xscvspdpn f1, vs0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: xscvdpspn vs0, f1
+; CHECK-NEXT: lfiwzx f1, 0, r29
+; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
+; CHECK-NEXT: ld r29, 88(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghw v2, vs0, v30
+; CHECK-NEXT: lfiwzx f0, 0, r30
+; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
+; CHECK-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghw v3, vs1, vs0
+; CHECK-NEXT: addi r1, r1, 112
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { <2 x float>, <2 x float> } @llvm.modf.v2f32(<2 x float> %a)
+ ret { <2 x float>, <2 x float> } %result
+}
+
+define { double, double } @test_modf_f64(double %a) {
+; CHECK-LABEL: test_modf_f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: addi r4, r1, 40
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfd f2, 40(r1)
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { double, double } @llvm.modf.f64(double %a)
+ ret { double, double } %result
+}
+
+define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) {
+; CHECK-LABEL: test_modf_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -80(r1)
+; CHECK-NEXT: std r0, 96(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: .cfi_offset v30, -32
+; CHECK-NEXT: .cfi_offset v31, -16
+; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT: vmr v31, v2
+; CHECK-NEXT: addi r4, r1, 32
+; CHECK-NEXT: xscpsgndp f1, v31, v31
+; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: nop
+; CHECK-NEXT: xscpsgndp v30, f1, f1
+; CHECK-NEXT: xxswapd vs1, v31
+; CHECK-NEXT: addi r4, r1, 40
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxmrghd v2, v30, vs1
+; CHECK-NEXT: lfd f0, 32(r1)
+; CHECK-NEXT: lfd f1, 40(r1)
+; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
+; CHECK-NEXT: xxmrghd v3, vs0, vs1
+; CHECK-NEXT: addi r1, r1, 80
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> %a)
+ ret { <2 x double>, <2 x double> } %result
+}
+
+define { ppc_fp128, ppc_fp128 } @test_modf_ppcf128(ppc_fp128 %a) {
+; CHECK-LABEL: test_modf_ppcf128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: addi r5, r1, 32
+; CHECK-NEXT: bl modfl
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfd f3, 32(r1)
+; CHECK-NEXT: lfd f4, 40(r1)
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a)
+ ret { ppc_fp128, ppc_fp128 } %result
+}
|
Should fix the issue reported here: #126750 (comment) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, but I don't know this code.
If you need a real review, better to revert and reland with a fix.
I think I should be able to land this fix tomorrow. Feel free to revert the clang patch in the meantime and I'll reland it later (not at my PC atm). |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/13314 Here is the relevant piece of the build log for the reference
|
It didn't help https://lab.llvm.org/buildbot/#/builders/72/builds/8406 |
It looks like that's ran into a different issue now, this should help: #127976 |
Should fix: https://lab.llvm.org/buildbot/#/builders/72/builds/8380
(
test_modf_ppcf128
is the test case that needed the additional legalization)