Skip to content

[AArch64][DAG] Allow fptos/ui.sat to scalarized. #126799

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_BITCAST(SDNode *N);
SDValue ScalarizeVecOp_UnaryOp(SDNode *N);
SDValue ScalarizeVecOp_UnaryOpWithExtraInput(SDNode *N);
SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N);
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_INSERT_SUBVECTOR(SDNode *N, unsigned OpNo);
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::LLRINT:
Res = ScalarizeVecOp_UnaryOp(N);
break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
Res = ScalarizeVecOp_UnaryOpWithExtraInput(N);
break;
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::STRICT_FP_TO_SINT:
Expand Down Expand Up @@ -882,6 +886,20 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
}

/// Same as ScalarizeVecOp_UnaryOp with an extra operand (for example a
/// typesize).
SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOpWithExtraInput(SDNode *N) {
assert(N->getValueType(0).getVectorNumElements() == 1 &&
"Unexpected vector type!");
SDValue Elt = GetScalarizedVector(N->getOperand(0));
SDValue Op =
DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0).getScalarType(),
Elt, N->getOperand(1));
// Revectorize the result so the types line up with what the uses of this
// expression expect.
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
}

/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
/// Do the strict FP operation on the element instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) {
Expand Down
148 changes: 148 additions & 0 deletions llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5548,3 +5548,151 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) {
%x = call <16 x i16> @llvm.fptosi.sat.v16f64.v16i16(<16 x double> %f)
ret <16 x i16> %x
}

define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) {
; CHECK-SD-LABEL: test_signed_v2f128_v2i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #96
; CHECK-SD-NEXT: stp x30, x21, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 96
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: mov v2.16b, v1.16b
; CHECK-SD-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill
; CHECK-SD-NEXT: adrp x8, .LCPI86_0
; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI86_0]
; CHECK-SD-NEXT: mov v0.16b, v2.16b
; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: bl __getf2
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixtfdi
; CHECK-SD-NEXT: adrp x8, .LCPI86_1
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: cmp w19, #0
; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI86_1]
; CHECK-SD-NEXT: mov x20, #-9223372036854775808 // =0x8000000000000000
; CHECK-SD-NEXT: csel x19, x20, x0, lt
; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x21, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: csel x19, x21, x19, gt
; CHECK-SD-NEXT: mov v1.16b, v0.16b
; CHECK-SD-NEXT: bl __unordtf2
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: csel x8, xzr, x19, ne
; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: bl __getf2
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixtfdi
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: cmp w19, #0
; CHECK-SD-NEXT: csel x19, x20, x0, lt
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: csel x19, x21, x19, gt
; CHECK-SD-NEXT: mov v1.16b, v0.16b
; CHECK-SD-NEXT: bl __unordtf2
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: csel x8, xzr, x19, ne
; CHECK-SD-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: ldp x30, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: add sp, sp, #96
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_signed_v2f128_v2i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sub sp, sp, #112
; CHECK-GI-NEXT: stp x30, x23, [sp, #64] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 112
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w21, -24
; CHECK-GI-NEXT: .cfi_offset w22, -32
; CHECK-GI-NEXT: .cfi_offset w23, -40
; CHECK-GI-NEXT: .cfi_offset w30, -48
; CHECK-GI-NEXT: adrp x8, .LCPI86_1
; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI86_1]
; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: mov v1.16b, v2.16b
; CHECK-GI-NEXT: str q2, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: bl __getf2
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x20, #-4594234569871327232 // =0xc03e000000000000
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: csel x19, x8, xzr, lt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
; CHECK-GI-NEXT: csel x21, x8, x20, lt
; CHECK-GI-NEXT: adrp x8, .LCPI86_0
; CHECK-GI-NEXT: mov v0.d[1], x21
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI86_0]
; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: bl __gttf2
; CHECK-GI-NEXT: mov x22, #-1125899906842624 // =0xfffc000000000000
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x23, #4629137466983448575 // =0x403dffffffffffff
; CHECK-GI-NEXT: csel x8, x19, x22, gt
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: csel x8, x21, x23, gt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfdi
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov x19, x0
; CHECK-GI-NEXT: mov v1.16b, v0.16b
; CHECK-GI-NEXT: bl __unordtf2
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel x21, xzr, x19, ne
; CHECK-GI-NEXT: bl __getf2
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: csel x19, x8, xzr, lt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
; CHECK-GI-NEXT: csel x20, x8, x20, lt
; CHECK-GI-NEXT: mov v0.d[1], x20
; CHECK-GI-NEXT: bl __gttf2
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel x8, x19, x22, gt
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: csel x8, x20, x23, gt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfdi
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov x19, x0
; CHECK-GI-NEXT: mov v1.16b, v0.16b
; CHECK-GI-NEXT: bl __unordtf2
; CHECK-GI-NEXT: mov v0.d[0], x21
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel x8, xzr, x19, ne
; CHECK-GI-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x30, x23, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: add sp, sp, #112
; CHECK-GI-NEXT: ret
%x = call <2 x i64> @llvm.fptosi.sat.v2f128.v2i64(<2 x fp128> %f)
ret <2 x i64> %x
}
118 changes: 118 additions & 0 deletions llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4546,3 +4546,121 @@ define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) {
%x = call <16 x i16> @llvm.fptoui.sat.v16f64.v16i16(<16 x double> %f)
ret <16 x i16> %x
}

define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) {
; CHECK-SD-LABEL: test_signed_v2f128_v2i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #80
; CHECK-SD-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 80
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w30, -16
; CHECK-SD-NEXT: mov v2.16b, v1.16b
; CHECK-SD-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill
; CHECK-SD-NEXT: adrp x8, .LCPI86_0
; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI86_0]
; CHECK-SD-NEXT: mov v0.16b, v2.16b
; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: bl __getf2
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixunstfdi
; CHECK-SD-NEXT: adrp x8, .LCPI86_1
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: cmp w19, #0
; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI86_1]
; CHECK-SD-NEXT: csel x19, xzr, x0, lt
; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: csinv x8, x19, xzr, le
; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: bl __getf2
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixunstfdi
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: cmp w19, #0
; CHECK-SD-NEXT: csel x19, xzr, x0, lt
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: csinv x8, x19, xzr, le
; CHECK-SD-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: add sp, sp, #80
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_signed_v2f128_v2i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sub sp, sp, #96
; CHECK-GI-NEXT: stp x30, x23, [sp, #48] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w21, -24
; CHECK-GI-NEXT: .cfi_offset w22, -32
; CHECK-GI-NEXT: .cfi_offset w23, -40
; CHECK-GI-NEXT: .cfi_offset w30, -48
; CHECK-GI-NEXT: adrp x8, .LCPI86_1
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI86_1]
; CHECK-GI-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill
; CHECK-GI-NEXT: mov v1.16b, v2.16b
; CHECK-GI-NEXT: bl __getf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: csel x19, x8, xzr, lt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
; CHECK-GI-NEXT: csel x20, x8, xzr, lt
; CHECK-GI-NEXT: adrp x8, .LCPI86_0
; CHECK-GI-NEXT: mov v0.d[1], x20
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI86_0]
; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: bl __gttf2
; CHECK-GI-NEXT: mov x21, #-562949953421312 // =0xfffe000000000000
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x22, #4629418941960159231 // =0x403effffffffffff
; CHECK-GI-NEXT: csel x8, x19, x21, gt
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: csel x8, x20, x22, gt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixunstfdi
; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT: mov x19, x0
; CHECK-GI-NEXT: bl __getf2
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: csel x20, x8, xzr, lt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x20
; CHECK-GI-NEXT: csel x23, x8, xzr, lt
; CHECK-GI-NEXT: mov v0.d[1], x23
; CHECK-GI-NEXT: bl __gttf2
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel x8, x20, x21, gt
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: csel x8, x23, x22, gt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixunstfdi
; CHECK-GI-NEXT: mov v0.d[0], x19
; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x30, x23, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v0.d[1], x0
; CHECK-GI-NEXT: add sp, sp, #96
; CHECK-GI-NEXT: ret
%x = call <2 x i64> @llvm.fptoui.sat.v2f128.v2i64(<2 x fp128> %f)
ret <2 x i64> %x
}