Skip to content

Commit 20fc2ff

Browse files
committed
[AArch64][GlobalISel] Handle fp constant splats
This changes the DUP(constant) -> MOVI code to handle either integer or fp types, allowing more constant to be selected, and fixes up some cases where fp constants were being incorrectly selected.
1 parent 9d54ae8 commit 20fc2ff

File tree

3 files changed

+21
-44
lines changed

3 files changed

+21
-44
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2251,7 +2251,7 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
22512251
// Before selecting a DUP instruction, check if it is better selected as a
22522252
// MOV or load from a constant pool.
22532253
Register Src = I.getOperand(1).getReg();
2254-
auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2254+
auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
22552255
if (!ValAndVReg)
22562256
return false;
22572257
LLVMContext &Ctx = MF.getFunction().getContext();
@@ -5600,8 +5600,7 @@ MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
56005600
if (DstSize == 128) {
56015601
if (Bits.getHiBits(64) != Bits.getLoBits(64))
56025602
return nullptr;
5603-
// Need to deal with 4f32
5604-
Op = AArch64::FMOVv2f64_ns;
5603+
Op = AArch64::FMOVv4f32_ns;
56055604
IsWide = true;
56065605
} else {
56075606
Op = AArch64::FMOVv2f32_ns;
@@ -5610,9 +5609,10 @@ MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
56105609
uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
56115610

56125611
if (AArch64_AM::isAdvSIMDModImmType11(Val)) {
5613-
Val = AArch64_AM::encodeAdvSIMDModImmType7(Val);
5612+
Val = AArch64_AM::encodeAdvSIMDModImmType11(Val);
56145613
} else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
56155614
Val = AArch64_AM::encodeAdvSIMDModImmType12(Val);
5615+
Op = AArch64::FMOVv2f64_ns;
56165616
} else
56175617
return nullptr;
56185618

llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ entry:
3939
define <4 x half> @test_v4s16(<4 x half> %a) #0 {
4040
; CHECK-LABEL: test_v4s16:
4141
; CHECK: // %bb.0: // %entry
42-
; CHECK-NEXT: movi d1, #0000000000000000
43-
; CHECK-NEXT: dup v1.4h, v1.h[0]
42+
; CHECK-NEXT: movi v1.2d, #0000000000000000
4443
; CHECK-NEXT: fmax v0.4h, v1.4h, v0.4h
4544
; CHECK-NEXT: ret
4645
entry:
@@ -52,8 +51,7 @@ entry:
5251
define <8 x half> @test_v8s16(<8 x half> %a) #0 {
5352
; CHECK-LABEL: test_v8s16:
5453
; CHECK: // %bb.0: // %entry
55-
; CHECK-NEXT: movi d1, #0000000000000000
56-
; CHECK-NEXT: dup v1.8h, v1.h[0]
54+
; CHECK-NEXT: movi v1.2d, #0000000000000000
5755
; CHECK-NEXT: fmax v0.8h, v1.8h, v0.8h
5856
; CHECK-NEXT: ret
5957
entry:
@@ -65,8 +63,7 @@ entry:
6563
define <2 x float> @test_v2s32(<2 x float> %a) #0 {
6664
; CHECK-LABEL: test_v2s32:
6765
; CHECK: // %bb.0: // %entry
68-
; CHECK-NEXT: movi d1, #0000000000000000
69-
; CHECK-NEXT: dup v1.2s, v1.s[0]
66+
; CHECK-NEXT: movi v1.2d, #0000000000000000
7067
; CHECK-NEXT: fmax v0.2s, v1.2s, v0.2s
7168
; CHECK-NEXT: ret
7269
entry:
@@ -78,8 +75,7 @@ entry:
7875
define <4 x float> @test_v4s32(<4 x float> %a) #0 {
7976
; CHECK-LABEL: test_v4s32:
8077
; CHECK: // %bb.0: // %entry
81-
; CHECK-NEXT: movi d1, #0000000000000000
82-
; CHECK-NEXT: dup v1.4s, v1.s[0]
78+
; CHECK-NEXT: movi v1.2d, #0000000000000000
8379
; CHECK-NEXT: fmax v0.4s, v1.4s, v0.4s
8480
; CHECK-NEXT: ret
8581
entry:
@@ -91,8 +87,7 @@ entry:
9187
define <2 x double> @test_v2s64(<2 x double> %a) #0 {
9288
; CHECK-LABEL: test_v2s64:
9389
; CHECK: // %bb.0: // %entry
94-
; CHECK-NEXT: movi d1, #0000000000000000
95-
; CHECK-NEXT: dup v1.2d, v1.d[0]
90+
; CHECK-NEXT: movi v1.2d, #0000000000000000
9691
; CHECK-NEXT: fmax v0.2d, v1.2d, v0.2d
9792
; CHECK-NEXT: ret
9893
entry:

llvm/test/CodeGen/AArch64/neon-mov.ll

Lines changed: 12 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -305,44 +305,26 @@ define <1 x i64> @movid() {
305305
}
306306

307307
define <2 x float> @fmov2s() {
308-
; CHECK-SD-LABEL: fmov2s:
309-
; CHECK-SD: // %bb.0:
310-
; CHECK-SD-NEXT: fmov v0.2s, #-12.00000000
311-
; CHECK-SD-NEXT: ret
312-
;
313-
; CHECK-GI-LABEL: fmov2s:
314-
; CHECK-GI: // %bb.0:
315-
; CHECK-GI-NEXT: fmov s0, #-12.00000000
316-
; CHECK-GI-NEXT: dup v0.2s, v0.s[0]
317-
; CHECK-GI-NEXT: ret
308+
; CHECK-LABEL: fmov2s:
309+
; CHECK: // %bb.0:
310+
; CHECK-NEXT: fmov v0.2s, #-12.00000000
311+
; CHECK-NEXT: ret
318312
ret <2 x float> < float -1.2e1, float -1.2e1>
319313
}
320314

321315
define <4 x float> @fmov4s() {
322-
; CHECK-SD-LABEL: fmov4s:
323-
; CHECK-SD: // %bb.0:
324-
; CHECK-SD-NEXT: fmov v0.4s, #-12.00000000
325-
; CHECK-SD-NEXT: ret
326-
;
327-
; CHECK-GI-LABEL: fmov4s:
328-
; CHECK-GI: // %bb.0:
329-
; CHECK-GI-NEXT: fmov s0, #-12.00000000
330-
; CHECK-GI-NEXT: dup v0.4s, v0.s[0]
331-
; CHECK-GI-NEXT: ret
316+
; CHECK-LABEL: fmov4s:
317+
; CHECK: // %bb.0:
318+
; CHECK-NEXT: fmov v0.4s, #-12.00000000
319+
; CHECK-NEXT: ret
332320
ret <4 x float> < float -1.2e1, float -1.2e1, float -1.2e1, float -1.2e1>
333321
}
334322

335323
define <2 x double> @fmov2d() {
336-
; CHECK-SD-LABEL: fmov2d:
337-
; CHECK-SD: // %bb.0:
338-
; CHECK-SD-NEXT: fmov v0.2d, #-12.00000000
339-
; CHECK-SD-NEXT: ret
340-
;
341-
; CHECK-GI-LABEL: fmov2d:
342-
; CHECK-GI: // %bb.0:
343-
; CHECK-GI-NEXT: fmov d0, #-12.00000000
344-
; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
345-
; CHECK-GI-NEXT: ret
324+
; CHECK-LABEL: fmov2d:
325+
; CHECK: // %bb.0:
326+
; CHECK-NEXT: fmov v0.2d, #-12.00000000
327+
; CHECK-NEXT: ret
346328
ret <2 x double> < double -1.2e1, double -1.2e1>
347329
}
348330

0 commit comments

Comments
 (0)