Skip to content

Commit d4ab3df

Browse files
authored
[AArch64] Fix SVE scalar fcopysign lowering without neon. (#129787)
Without this we can try to generate invalid instructions or create illegal types. This patch generates a SVE fcopysign instead and use its lowering. BF16 is left out of the moment as it doesn't lower successfully (but could use the same code as fp16).
1 parent 58670aa commit d4ab3df

File tree

2 files changed

+74
-84
lines changed

2 files changed

+74
-84
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10686,6 +10686,25 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
1068610686
return convertFromScalableVector(DAG, VT, Res);
1068710687
}
1068810688

10689+
// With SVE, but without Neon, extend the scalars to scalable vectors and use
10690+
// a SVE FCOPYSIGN.
10691+
if (!VT.isVector() && !Subtarget->isNeonAvailable() &&
10692+
Subtarget->isSVEorStreamingSVEAvailable()) {
10693+
if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64)
10694+
return SDValue();
10695+
EVT SVT = getPackedSVEVectorVT(VT);
10696+
10697+
SDValue Ins1 =
10698+
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SVT, DAG.getUNDEF(SVT), In1,
10699+
DAG.getConstant(0, DL, MVT::i64));
10700+
SDValue Ins2 =
10701+
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SVT, DAG.getUNDEF(SVT), In2,
10702+
DAG.getConstant(0, DL, MVT::i64));
10703+
SDValue FCS = DAG.getNode(ISD::FCOPYSIGN, DL, SVT, Ins1, Ins2);
10704+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, FCS,
10705+
DAG.getConstant(0, DL, MVT::i64));
10706+
}
10707+
1068910708
auto BitCast = [this](EVT VT, SDValue Op, SelectionDAG &DAG) {
1069010709
if (VT.isScalableVector())
1069110710
return getSVESafeBitCast(VT, Op, DAG);

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll

Lines changed: 55 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -11,32 +11,21 @@ target triple = "aarch64-unknown-linux-gnu"
1111
define void @test_copysign_f16(ptr %ap, ptr %bp) {
1212
; SVE-LABEL: test_copysign_f16:
1313
; SVE: // %bb.0:
14-
; SVE-NEXT: adrp x8, .LCPI0_0
14+
; SVE-NEXT: ldr h0, [x1]
1515
; SVE-NEXT: ldr h1, [x0]
16-
; SVE-NEXT: ldr h2, [x1]
17-
; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
18-
; SVE-NEXT: adrp x8, .LCPI0_1
19-
; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]
20-
; SVE-NEXT: mov z3.d, z0.d
21-
; SVE-NEXT: fmov s0, s1
22-
; SVE-NEXT: fmov s3, s2
23-
; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
16+
; SVE-NEXT: and z0.h, z0.h, #0x8000
17+
; SVE-NEXT: and z1.h, z1.h, #0x7fff
18+
; SVE-NEXT: orr z0.d, z1.d, z0.d
2419
; SVE-NEXT: str h0, [x0]
2520
; SVE-NEXT: ret
2621
;
2722
; SVE2-LABEL: test_copysign_f16:
2823
; SVE2: // %bb.0:
29-
; SVE2-NEXT: adrp x8, .LCPI0_0
30-
; SVE2-NEXT: ldr h1, [x0]
31-
; SVE2-NEXT: ldr h2, [x1]
32-
; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
33-
; SVE2-NEXT: adrp x8, .LCPI0_1
34-
; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]
35-
; SVE2-NEXT: mov z3.d, z0.d
36-
; SVE2-NEXT: fmov s0, s1
37-
; SVE2-NEXT: fmov s3, s2
38-
; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
39-
; SVE2-NEXT: str h0, [x0]
24+
; SVE2-NEXT: mov z0.h, #32767 // =0x7fff
25+
; SVE2-NEXT: ldr h1, [x1]
26+
; SVE2-NEXT: ldr h2, [x0]
27+
; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
28+
; SVE2-NEXT: str h2, [x0]
4029
; SVE2-NEXT: ret
4130
;
4231
; NONEON-NOSVE-LABEL: test_copysign_f16:
@@ -66,32 +55,40 @@ define void @test_copysign_f16(ptr %ap, ptr %bp) {
6655
define void @test_copysign_bf16(ptr %ap, ptr %bp) {
6756
; SVE-LABEL: test_copysign_bf16:
6857
; SVE: // %bb.0:
69-
; SVE-NEXT: adrp x8, .LCPI1_0
70-
; SVE-NEXT: ldr h1, [x0]
71-
; SVE-NEXT: ldr h2, [x1]
72-
; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
73-
; SVE-NEXT: adrp x8, .LCPI1_1
74-
; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
75-
; SVE-NEXT: mov z3.d, z0.d
76-
; SVE-NEXT: fmov s0, s1
77-
; SVE-NEXT: fmov s3, s2
78-
; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
58+
; SVE-NEXT: sub sp, sp, #16
59+
; SVE-NEXT: .cfi_def_cfa_offset 16
60+
; SVE-NEXT: ldr h0, [x0]
61+
; SVE-NEXT: ldr h1, [x1]
62+
; SVE-NEXT: fmov w8, s0
63+
; SVE-NEXT: str h1, [sp, #12]
64+
; SVE-NEXT: ldrb w9, [sp, #13]
65+
; SVE-NEXT: and w8, w8, #0x7fff
66+
; SVE-NEXT: tst w9, #0x80
67+
; SVE-NEXT: fmov s0, w8
68+
; SVE-NEXT: eor w8, w8, #0x8000
69+
; SVE-NEXT: fmov s1, w8
70+
; SVE-NEXT: fcsel h0, h1, h0, ne
7971
; SVE-NEXT: str h0, [x0]
72+
; SVE-NEXT: add sp, sp, #16
8073
; SVE-NEXT: ret
8174
;
8275
; SVE2-LABEL: test_copysign_bf16:
8376
; SVE2: // %bb.0:
84-
; SVE2-NEXT: adrp x8, .LCPI1_0
85-
; SVE2-NEXT: ldr h1, [x0]
86-
; SVE2-NEXT: ldr h2, [x1]
87-
; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
88-
; SVE2-NEXT: adrp x8, .LCPI1_1
89-
; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
90-
; SVE2-NEXT: mov z3.d, z0.d
91-
; SVE2-NEXT: fmov s0, s1
92-
; SVE2-NEXT: fmov s3, s2
93-
; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
77+
; SVE2-NEXT: sub sp, sp, #16
78+
; SVE2-NEXT: .cfi_def_cfa_offset 16
79+
; SVE2-NEXT: ldr h0, [x0]
80+
; SVE2-NEXT: ldr h1, [x1]
81+
; SVE2-NEXT: fmov w8, s0
82+
; SVE2-NEXT: str h1, [sp, #12]
83+
; SVE2-NEXT: ldrb w9, [sp, #13]
84+
; SVE2-NEXT: and w8, w8, #0x7fff
85+
; SVE2-NEXT: tst w9, #0x80
86+
; SVE2-NEXT: fmov s0, w8
87+
; SVE2-NEXT: eor w8, w8, #0x8000
88+
; SVE2-NEXT: fmov s1, w8
89+
; SVE2-NEXT: fcsel h0, h1, h0, ne
9490
; SVE2-NEXT: str h0, [x0]
91+
; SVE2-NEXT: add sp, sp, #16
9592
; SVE2-NEXT: ret
9693
;
9794
; NONEON-NOSVE-LABEL: test_copysign_bf16:
@@ -139,32 +136,21 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) {
139136
define void @test_copysign_f32(ptr %ap, ptr %bp) {
140137
; SVE-LABEL: test_copysign_f32:
141138
; SVE: // %bb.0:
142-
; SVE-NEXT: adrp x8, .LCPI2_0
139+
; SVE-NEXT: ldr s0, [x1]
143140
; SVE-NEXT: ldr s1, [x0]
144-
; SVE-NEXT: ldr s2, [x1]
145-
; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
146-
; SVE-NEXT: adrp x8, .LCPI2_1
147-
; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]
148-
; SVE-NEXT: mov z3.d, z0.d
149-
; SVE-NEXT: fmov s0, s1
150-
; SVE-NEXT: fmov s3, s2
151-
; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
141+
; SVE-NEXT: and z0.s, z0.s, #0x80000000
142+
; SVE-NEXT: and z1.s, z1.s, #0x7fffffff
143+
; SVE-NEXT: orr z0.d, z1.d, z0.d
152144
; SVE-NEXT: str s0, [x0]
153145
; SVE-NEXT: ret
154146
;
155147
; SVE2-LABEL: test_copysign_f32:
156148
; SVE2: // %bb.0:
157-
; SVE2-NEXT: adrp x8, .LCPI2_0
158-
; SVE2-NEXT: ldr s1, [x0]
159-
; SVE2-NEXT: ldr s2, [x1]
160-
; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
161-
; SVE2-NEXT: adrp x8, .LCPI2_1
162-
; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]
163-
; SVE2-NEXT: mov z3.d, z0.d
164-
; SVE2-NEXT: fmov s0, s1
165-
; SVE2-NEXT: fmov s3, s2
166-
; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
167-
; SVE2-NEXT: str s0, [x0]
149+
; SVE2-NEXT: mov z0.s, #0x7fffffff
150+
; SVE2-NEXT: ldr s1, [x1]
151+
; SVE2-NEXT: ldr s2, [x0]
152+
; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
153+
; SVE2-NEXT: str s2, [x0]
168154
; SVE2-NEXT: ret
169155
;
170156
; NONEON-NOSVE-LABEL: test_copysign_f32:
@@ -187,36 +173,21 @@ define void @test_copysign_f32(ptr %ap, ptr %bp) {
187173
define void @test_copysign_f64(ptr %ap, ptr %bp) {
188174
; SVE-LABEL: test_copysign_f64:
189175
; SVE: // %bb.0:
190-
; SVE-NEXT: adrp x8, .LCPI3_1
191-
; SVE-NEXT: ptrue p0.d, vl2
192-
; SVE-NEXT: ldr d2, [x0]
193-
; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
194-
; SVE-NEXT: adrp x8, .LCPI3_0
195-
; SVE-NEXT: ldr d3, [x1]
196-
; SVE-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
197-
; SVE-NEXT: fneg z0.d, p0/m, z0.d
198-
; SVE-NEXT: mov z4.d, z1.d
199-
; SVE-NEXT: fmov d1, d2
200-
; SVE-NEXT: fmov d4, d3
201-
; SVE-NEXT: bsl v0.16b, v1.16b, v4.16b
176+
; SVE-NEXT: ldr d0, [x1]
177+
; SVE-NEXT: ldr d1, [x0]
178+
; SVE-NEXT: and z0.d, z0.d, #0x8000000000000000
179+
; SVE-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
180+
; SVE-NEXT: orr z0.d, z1.d, z0.d
202181
; SVE-NEXT: str d0, [x0]
203182
; SVE-NEXT: ret
204183
;
205184
; SVE2-LABEL: test_copysign_f64:
206185
; SVE2: // %bb.0:
207-
; SVE2-NEXT: adrp x8, .LCPI3_1
208-
; SVE2-NEXT: ptrue p0.d, vl2
186+
; SVE2-NEXT: mov z0.d, #0x7fffffffffffffff
187+
; SVE2-NEXT: ldr d1, [x1]
209188
; SVE2-NEXT: ldr d2, [x0]
210-
; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
211-
; SVE2-NEXT: adrp x8, .LCPI3_0
212-
; SVE2-NEXT: ldr d3, [x1]
213-
; SVE2-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
214-
; SVE2-NEXT: fneg z0.d, p0/m, z0.d
215-
; SVE2-NEXT: mov z4.d, z1.d
216-
; SVE2-NEXT: fmov d1, d2
217-
; SVE2-NEXT: fmov d4, d3
218-
; SVE2-NEXT: bsl v0.16b, v1.16b, v4.16b
219-
; SVE2-NEXT: str d0, [x0]
189+
; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
190+
; SVE2-NEXT: str d2, [x0]
220191
; SVE2-NEXT: ret
221192
;
222193
; NONEON-NOSVE-LABEL: test_copysign_f64:

0 commit comments

Comments
 (0)