Skip to content

[SDAG] Add missing ppc_fp128 ExpandFloatRes legalization for modf #127895

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1569,6 +1569,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
case ISD::STRICT_FREM:
case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
case ISD::FMODF: ExpandFloatRes_FMODF(N); break;
// clang-format on
}

Expand Down Expand Up @@ -1619,6 +1620,23 @@ void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
GetPairElements(Tmp.first, Lo, Hi);
}

void DAGTypeLegalizer::ExpandFloatRes_FMODF(SDNode *N) {
ExpandFloatRes_UnaryWithTwoFPResults(N, RTLIB::getMODF(N->getValueType(0)),
/*CallRetResNo=*/0);
}

void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults(
SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) {
assert(!N->isStrictFPOpcode() && "strictfp not implemented");
SmallVector<SDValue> Results;
DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo);
for (auto [ResNo, Res] : enumerate(Results)) {
SDValue Lo, Hi;
GetPairElements(Res, Lo, Hi);
SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
}
}

void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
SDValue &Hi) {
assert(N->getValueType(0) == MVT::ppcf128 &&
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_UnaryWithTwoFPResults(
SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo = {});

// clang-format off
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi);
Expand Down Expand Up @@ -714,6 +717,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMODF(SDNode *N);
// clang-format on

// Float Operand Expansion.
Expand Down
330 changes: 330 additions & 0 deletions llvm/test/CodeGen/PowerPC/llvm.modf.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,330 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s

define { half, half } @test_modf_f16(half %a) {
; CHECK-LABEL: test_modf_f16:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: std r0, 64(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: xscvdphp f0, f1
; CHECK-NEXT: addi r4, r1, 44
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
; CHECK-NEXT: lfs f2, 44(r1)
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%result = call { half, half } @llvm.modf.f16(half %a)
ret { half, half } %result
}

define half @test_modf_f16_only_use_fractional_part(half %a) {
; CHECK-LABEL: test_modf_f16_only_use_fractional_part:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: std r0, 64(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: xscvdphp f0, f1
; CHECK-NEXT: addi r4, r1, 44
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%result = call { half, half } @llvm.modf.f16(half %a)
%result.0 = extractvalue { half, half } %result, 0
ret half %result.0
}

define half @test_modf_f16_only_use_integral_part(half %a) {
; CHECK-LABEL: test_modf_f16_only_use_integral_part:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: std r0, 64(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: xscvdphp f0, f1
; CHECK-NEXT: addi r4, r1, 44
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
; CHECK-NEXT: lfs f1, 44(r1)
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%result = call { half, half } @llvm.modf.f16(half %a)
%result.1 = extractvalue { half, half } %result, 1
ret half %result.1
}

define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) {
; CHECK-LABEL: test_modf_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset f30, -16
; CHECK-NEXT: .cfi_offset f31, -8
; CHECK-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; CHECK-NEXT: stdu r1, -64(r1)
; CHECK-NEXT: std r0, 80(r1)
; CHECK-NEXT: xscvdphp f0, f2
; CHECK-NEXT: addi r4, r1, 40
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: xscvhpdp f31, f0
; CHECK-NEXT: xscvdphp f0, f1
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: clrlwi r3, r3, 16
; CHECK-NEXT: mtfprwz f0, r3
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
; CHECK-NEXT: addi r4, r1, 44
; CHECK-NEXT: fmr f30, f1
; CHECK-NEXT: fmr f1, f31
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
; CHECK-NEXT: lfs f3, 40(r1)
; CHECK-NEXT: fmr f2, f1
; CHECK-NEXT: fmr f1, f30
; CHECK-NEXT: lfs f4, 44(r1)
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; CHECK-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%result = call { <2 x half>, <2 x half> } @llvm.modf.v2f16(<2 x half> %a)
ret { <2 x half>, <2 x half> } %result
}

define { float, float } @test_modf_f32(float %a) {
; CHECK-LABEL: test_modf_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: std r0, 64(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: addi r4, r1, 44
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
; CHECK-NEXT: lfs f2, 44(r1)
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%result = call { float, float } @llvm.modf.f32(float %a)
ret { float, float } %result
}

define { <3 x float>, <3 x float> } @test_modf_v3f32(<3 x float> %a) {
; CHECK-LABEL: test_modf_v3f32:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -112(r1)
; CHECK-NEXT: std r0, 128(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r28, -32
; CHECK-NEXT: .cfi_offset r29, -24
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: .cfi_offset v30, -64
; CHECK-NEXT: .cfi_offset v31, -48
; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-NEXT: std r30, 96(r1) # 8-byte Folded Spill
; CHECK-NEXT: addi r30, r1, 36
; CHECK-NEXT: std r28, 80(r1) # 8-byte Folded Spill
; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
; CHECK-NEXT: std r29, 88(r1) # 8-byte Folded Spill
; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
; CHECK-NEXT: mr r4, r30
; CHECK-NEXT: vmr v31, v2
; CHECK-NEXT: xscvspdpn f1, vs0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
; CHECK-NEXT: xxswapd vs0, v31
; CHECK-NEXT: addi r29, r1, 40
; CHECK-NEXT: xscvdpspn v30, f1
; CHECK-NEXT: mr r4, r29
; CHECK-NEXT: xscvspdpn f1, vs0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
; CHECK-NEXT: xscvdpspn vs0, f1
; CHECK-NEXT: addi r28, r1, 44
; CHECK-NEXT: mr r4, r28
; CHECK-NEXT: xxmrghw v30, vs0, v30
; CHECK-NEXT: xxsldwi vs0, v31, v31, 1
; CHECK-NEXT: xscvspdpn f1, vs0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
; CHECK-NEXT: addis r3, r2, .LCPI5_0@toc@ha
; CHECK-NEXT: xscvdpspn v2, f1
; CHECK-NEXT: lfiwzx f1, 0, r30
; CHECK-NEXT: lfiwzx f2, 0, r29
; CHECK-NEXT: lxsiwzx v3, 0, r28
; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
; CHECK-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, 88(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, 80(r1) # 8-byte Folded Reload
; CHECK-NEXT: addi r3, r3, .LCPI5_0@toc@l
; CHECK-NEXT: lxv vs0, 0(r3)
; CHECK-NEXT: xxmrghw v4, vs2, vs1
; CHECK-NEXT: xxperm v2, v30, vs0
; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
; CHECK-NEXT: xxperm v3, v4, vs0
; CHECK-NEXT: addi r1, r1, 112
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%result = call { <3 x float>, <3 x float> } @llvm.modf.v3f32(<3 x float> %a)
ret { <3 x float>, <3 x float> } %result
}

define { <2 x float>, <2 x float> } @test_modf_v2f32(<2 x float> %a) {
; CHECK-LABEL: test_modf_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -112(r1)
; CHECK-NEXT: std r0, 128(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r29, -24
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: .cfi_offset v30, -64
; CHECK-NEXT: .cfi_offset v31, -48
; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-NEXT: std r30, 96(r1) # 8-byte Folded Spill
; CHECK-NEXT: addi r30, r1, 40
; CHECK-NEXT: std r29, 88(r1) # 8-byte Folded Spill
; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
; CHECK-NEXT: mr r4, r30
; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
; CHECK-NEXT: xscvspdpn f1, vs0
; CHECK-NEXT: vmr v31, v2
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
; CHECK-NEXT: xxswapd vs0, v31
; CHECK-NEXT: addi r29, r1, 44
; CHECK-NEXT: xscvdpspn v30, f1
; CHECK-NEXT: mr r4, r29
; CHECK-NEXT: xscvspdpn f1, vs0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
; CHECK-NEXT: xscvdpspn vs0, f1
; CHECK-NEXT: lfiwzx f1, 0, r29
; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
; CHECK-NEXT: ld r29, 88(r1) # 8-byte Folded Reload
; CHECK-NEXT: xxmrghw v2, vs0, v30
; CHECK-NEXT: lfiwzx f0, 0, r30
; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
; CHECK-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
; CHECK-NEXT: xxmrghw v3, vs1, vs0
; CHECK-NEXT: addi r1, r1, 112
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%result = call { <2 x float>, <2 x float> } @llvm.modf.v2f32(<2 x float> %a)
ret { <2 x float>, <2 x float> } %result
}

define { double, double } @test_modf_f64(double %a) {
; CHECK-LABEL: test_modf_f64:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: std r0, 64(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: addi r4, r1, 40
; CHECK-NEXT: bl modf
; CHECK-NEXT: nop
; CHECK-NEXT: lfd f2, 40(r1)
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%result = call { double, double } @llvm.modf.f64(double %a)
ret { double, double } %result
}

define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) {
; CHECK-LABEL: test_modf_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -80(r1)
; CHECK-NEXT: std r0, 96(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 80
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset v30, -32
; CHECK-NEXT: .cfi_offset v31, -16
; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v2
; CHECK-NEXT: addi r4, r1, 32
; CHECK-NEXT: xscpsgndp f1, v31, v31
; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
; CHECK-NEXT: bl modf
; CHECK-NEXT: nop
; CHECK-NEXT: xscpsgndp v30, f1, f1
; CHECK-NEXT: xxswapd vs1, v31
; CHECK-NEXT: addi r4, r1, 40
; CHECK-NEXT: bl modf
; CHECK-NEXT: nop
; CHECK-NEXT: xxmrghd v2, v30, vs1
; CHECK-NEXT: lfd f0, 32(r1)
; CHECK-NEXT: lfd f1, 40(r1)
; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
; CHECK-NEXT: xxmrghd v3, vs0, vs1
; CHECK-NEXT: addi r1, r1, 80
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%result = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> %a)
ret { <2 x double>, <2 x double> } %result
}

define { ppc_fp128, ppc_fp128 } @test_modf_ppcf128(ppc_fp128 %a) {
; CHECK-LABEL: test_modf_ppcf128:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: std r0, 64(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: addi r5, r1, 32
; CHECK-NEXT: bl modfl
; CHECK-NEXT: nop
; CHECK-NEXT: lfd f3, 32(r1)
; CHECK-NEXT: lfd f4, 40(r1)
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
%result = call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a)
ret { ppc_fp128, ppc_fp128 } %result
}
Loading