Skip to content

Commit 6f37d42

Browse files
authored
[AArch64][GlobalISel] Reland Make G_DUP immediate 32-bits or larger (#96780) (#99014)
Immediate operand gets extended in RegBankSelect to at least 32 bits to allow for better pattern matching in TableGen The previous patch was erasing a constant without checking if it has more than one use Changes: - Does not erase the constant - Added @v_dup16_const test
1 parent e24dc34 commit 6f37d42

File tree

6 files changed

+160
-117
lines changed

6 files changed

+160
-117
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2284,8 +2284,9 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
22842284
Register Dst = I.getOperand(0).getReg();
22852285
auto *CV = ConstantDataVector::getSplat(
22862286
MRI.getType(Dst).getNumElements(),
2287-
ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2288-
ValAndVReg->Value));
2287+
ConstantInt::get(
2288+
Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
2289+
ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
22892290
if (!emitConstantVector(Dst, CV, MIB, MRI))
22902291
return false;
22912292
I.eraseFromParent();
@@ -5614,7 +5615,8 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
56145615
}
56155616

56165617
if (CV->getSplatValue()) {
5617-
APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
5618+
APInt DefBits = APInt::getSplat(
5619+
DstSize, CV->getUniqueInteger().trunc(DstTy.getScalarSizeInBits()));
56185620
auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
56195621
MachineInstr *NewOp;
56205622
bool Inv = false;

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "AArch64GenRegisterBankInfo.def"
4343

4444
using namespace llvm;
45+
static const unsigned CustomMappingID = 1;
4546

4647
AArch64RegisterBankInfo::AArch64RegisterBankInfo(
4748
const TargetRegisterInfo &TRI) {
@@ -424,6 +425,26 @@ void AArch64RegisterBankInfo::applyMappingImpl(
424425
MI.getOperand(2).setReg(Ext.getReg(0));
425426
return applyDefaultMapping(OpdMapper);
426427
}
428+
case AArch64::G_DUP: {
429+
// Extend smaller gpr to 32-bits
430+
assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 &&
431+
"Expected sources smaller than 32-bits");
432+
Builder.setInsertPt(*MI.getParent(), MI.getIterator());
433+
434+
Register ConstReg;
435+
auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg());
436+
if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) {
437+
auto CstVal = ConstMI->getOperand(1).getCImm()->getValue();
438+
ConstReg =
439+
Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0);
440+
} else {
441+
ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg())
442+
.getReg(0);
443+
}
444+
MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID));
445+
MI.getOperand(1).setReg(ConstReg);
446+
return applyDefaultMapping(OpdMapper);
447+
}
427448
default:
428449
llvm_unreachable("Don't know how to handle that operation");
429450
}
@@ -792,8 +813,14 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
792813
(getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
793814
onlyDefinesFP(*ScalarDef, MRI, TRI)))
794815
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
795-
else
816+
else {
817+
if (ScalarTy.getSizeInBits() < 32 &&
818+
getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank) {
819+
// Calls applyMappingImpl()
820+
MappingID = CustomMappingID;
821+
}
796822
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
823+
}
797824
break;
798825
}
799826
case TargetOpcode::G_TRUNC: {
@@ -1014,8 +1041,10 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
10141041
// If the type is i8/i16, and the regank will be GPR, then we change the
10151042
// type to i32 in applyMappingImpl.
10161043
LLT Ty = MRI.getType(MI.getOperand(2).getReg());
1017-
if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16)
1018-
MappingID = 1;
1044+
if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) {
1045+
// Calls applyMappingImpl()
1046+
MappingID = CustomMappingID;
1047+
}
10191048
OpRegBankIdx[2] = PMI_FirstGPR;
10201049
}
10211050

llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir

Lines changed: 42 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@ body: |
1616
1717
; CHECK-LABEL: name: v4s32_gpr
1818
; CHECK: liveins: $w0
19-
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
20-
; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
21-
; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
22-
; CHECK: RET_ReallyLR implicit $q0
19+
; CHECK-NEXT: {{ $}}
20+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
21+
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
22+
; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
23+
; CHECK-NEXT: RET_ReallyLR implicit $q0
2324
%0:_(s32) = COPY $w0
2425
%4:_(<4 x s32>) = G_DUP %0(s32)
2526
$q0 = COPY %4(<4 x s32>)
@@ -37,10 +38,11 @@ body: |
3738
3839
; CHECK-LABEL: name: v4s64_gpr
3940
; CHECK: liveins: $x0
40-
; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
41-
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
42-
; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
43-
; CHECK: RET_ReallyLR implicit $q0
41+
; CHECK-NEXT: {{ $}}
42+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
43+
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
44+
; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
45+
; CHECK-NEXT: RET_ReallyLR implicit $q0
4446
%0:_(s64) = COPY $x0
4547
%4:_(<2 x s64>) = G_DUP %0(s64)
4648
$q0 = COPY %4(<2 x s64>)
@@ -58,10 +60,11 @@ body: |
5860
5961
; CHECK-LABEL: name: v2s32_gpr
6062
; CHECK: liveins: $w0
61-
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
62-
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
63-
; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
64-
; CHECK: RET_ReallyLR implicit $d0
63+
; CHECK-NEXT: {{ $}}
64+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
65+
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
66+
; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
67+
; CHECK-NEXT: RET_ReallyLR implicit $d0
6568
%0:_(s32) = COPY $w0
6669
%4:_(<2 x s32>) = G_DUP %0(s32)
6770
$d0 = COPY %4(<2 x s32>)
@@ -79,10 +82,11 @@ body: |
7982
8083
; CHECK-LABEL: name: v4s32_fpr
8184
; CHECK: liveins: $s0
82-
; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
83-
; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
84-
; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
85-
; CHECK: RET_ReallyLR implicit $q0
85+
; CHECK-NEXT: {{ $}}
86+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
87+
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
88+
; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
89+
; CHECK-NEXT: RET_ReallyLR implicit $q0
8690
%0:_(s32) = COPY $s0
8791
%4:_(<4 x s32>) = G_DUP %0(s32)
8892
$q0 = COPY %4(<4 x s32>)
@@ -100,10 +104,11 @@ body: |
100104
101105
; CHECK-LABEL: name: v2s64_fpr
102106
; CHECK: liveins: $d0
103-
; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
104-
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
105-
; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
106-
; CHECK: RET_ReallyLR implicit $q0
107+
; CHECK-NEXT: {{ $}}
108+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
109+
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
110+
; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
111+
; CHECK-NEXT: RET_ReallyLR implicit $q0
107112
%0:_(s64) = COPY $d0
108113
%4:_(<2 x s64>) = G_DUP %0(s64)
109114
$q0 = COPY %4(<2 x s64>)
@@ -121,10 +126,11 @@ body: |
121126
122127
; CHECK-LABEL: name: v2s32_fpr
123128
; CHECK: liveins: $s0
124-
; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
125-
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
126-
; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
127-
; CHECK: RET_ReallyLR implicit $d0
129+
; CHECK-NEXT: {{ $}}
130+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
131+
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
132+
; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
133+
; CHECK-NEXT: RET_ReallyLR implicit $d0
128134
%0:_(s32) = COPY $s0
129135
%4:_(<2 x s32>) = G_DUP %0(s32)
130136
$d0 = COPY %4(<2 x s32>)
@@ -142,10 +148,11 @@ body: |
142148
143149
; CHECK-LABEL: name: v2s64_fpr_copy
144150
; CHECK: liveins: $d0
145-
; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
146-
; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
147-
; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
148-
; CHECK: RET_ReallyLR implicit $q0
151+
; CHECK-NEXT: {{ $}}
152+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
153+
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
154+
; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
155+
; CHECK-NEXT: RET_ReallyLR implicit $q0
149156
%0:_(s64) = COPY $d0
150157
%6:_(<2 x s64>) = G_DUP %0(s64)
151158
$q0 = COPY %6(<2 x s64>)
@@ -163,11 +170,13 @@ body: |
163170
164171
; CHECK-LABEL: name: v416s8_gpr
165172
; CHECK: liveins: $w0
166-
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
167-
; CHECK: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
168-
; CHECK: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP %trunc(s8)
169-
; CHECK: $q0 = COPY [[DUP]](<16 x s8>)
170-
; CHECK: RET_ReallyLR implicit $q0
173+
; CHECK-NEXT: {{ $}}
174+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
175+
; CHECK-NEXT: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
176+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT %trunc(s8)
177+
; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP [[ANYEXT]](s32)
178+
; CHECK-NEXT: $q0 = COPY [[DUP]](<16 x s8>)
179+
; CHECK-NEXT: RET_ReallyLR implicit $q0
171180
%0:_(s32) = COPY $w0
172181
%trunc:_(s8) = G_TRUNC %0(s32)
173182
%1:_(<16 x s8>) = G_DUP %trunc(s8)

llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,3 +453,22 @@ body: |
453453
%dup:fpr(<2 x p0>) = G_DUP %cst(p0)
454454
$q0 = COPY %dup(<2 x p0>)
455455
RET_ReallyLR implicit $q0
456+
...
457+
---
458+
name: cstv4i16gpri32
459+
legalized: true
460+
regBankSelected: true
461+
tracksRegLiveness: true
462+
body: |
463+
bb.0.entry:
464+
liveins:
465+
; CHECK-LABEL: name: cstv4i16gpri32
466+
; CHECK: %dup:fpr64 = MOVIv4i16 3, 0
467+
; CHECK-NEXT: $d0 = COPY %dup
468+
; CHECK-NEXT: RET_ReallyLR implicit $d0
469+
%cst:gpr(s32) = G_CONSTANT i32 3
470+
%dup:fpr(<4 x s16>) = G_DUP %cst(s32)
471+
$d0 = COPY %dup(<4 x s16>)
472+
RET_ReallyLR implicit $d0
473+
474+
...

llvm/test/CodeGen/AArch64/arm64-dup.ll

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,19 @@ define <4 x i32> @v_dupQ32(i32 %A) nounwind {
103103
ret <4 x i32> %tmp4
104104
}
105105

106+
define <4 x i16> @v_dup16_const(i16 %y, ptr %p) {
107+
; CHECK-LABEL: v_dup16_const:
108+
; CHECK: // %bb.0:
109+
; CHECK-NEXT: movi.4h v0, #10
110+
; CHECK-NEXT: mov w8, #10 // =0xa
111+
; CHECK-NEXT: strh w8, [x1]
112+
; CHECK-NEXT: ret
113+
%i = insertelement <4 x i16> undef, i16 10, i32 0
114+
%lo = shufflevector <4 x i16> %i, <4 x i16> undef, <4 x i32> zeroinitializer
115+
store i16 10, ptr %p
116+
ret <4 x i16> %lo
117+
}
118+
106119
define <4 x float> @v_dupQfloat(float %A) nounwind {
107120
; CHECK-LABEL: v_dupQfloat:
108121
; CHECK: // %bb.0:
@@ -420,9 +433,9 @@ define <4 x i16> @test_perfectshuffle_dupext_v4i16(<4 x i16> %a, <4 x i16> %b) n
420433
; CHECK-GI: // %bb.0:
421434
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
422435
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
423-
; CHECK-GI-NEXT: adrp x8, .LCPI33_0
436+
; CHECK-GI-NEXT: adrp x8, .LCPI34_0
424437
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
425-
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI33_0]
438+
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0]
426439
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
427440
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
428441
; CHECK-GI-NEXT: ret
@@ -443,9 +456,9 @@ define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b
443456
; CHECK-GI: // %bb.0:
444457
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
445458
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
446-
; CHECK-GI-NEXT: adrp x8, .LCPI34_0
459+
; CHECK-GI-NEXT: adrp x8, .LCPI35_0
447460
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
448-
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0]
461+
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI35_0]
449462
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
450463
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
451464
; CHECK-GI-NEXT: ret
@@ -462,9 +475,9 @@ define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) n
462475
;
463476
; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i32:
464477
; CHECK-GI: // %bb.0:
465-
; CHECK-GI-NEXT: adrp x8, .LCPI35_0
478+
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
466479
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
467-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0]
480+
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
468481
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
469482
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
470483
; CHECK-GI-NEXT: ret
@@ -481,9 +494,9 @@ define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float>
481494
;
482495
; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f32:
483496
; CHECK-GI: // %bb.0:
484-
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
497+
; CHECK-GI-NEXT: adrp x8, .LCPI37_0
485498
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
486-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
499+
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0]
487500
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
488501
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
489502
; CHECK-GI-NEXT: ret
@@ -503,12 +516,12 @@ define void @disguised_dup(<4 x float> %x, ptr %p1, ptr %p2) {
503516
;
504517
; CHECK-GI-LABEL: disguised_dup:
505518
; CHECK-GI: // %bb.0:
506-
; CHECK-GI-NEXT: adrp x8, .LCPI37_1
519+
; CHECK-GI-NEXT: adrp x8, .LCPI38_1
507520
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
508-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_1]
509-
; CHECK-GI-NEXT: adrp x8, .LCPI37_0
521+
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_1]
522+
; CHECK-GI-NEXT: adrp x8, .LCPI38_0
510523
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
511-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0]
524+
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_0]
512525
; CHECK-GI-NEXT: tbl.16b v2, { v0, v1 }, v2
513526
; CHECK-GI-NEXT: str q0, [x0]
514527
; CHECK-GI-NEXT: str q2, [x1]
@@ -531,8 +544,8 @@ define <2 x i32> @dup_const2(<2 x i32> %A) nounwind {
531544
;
532545
; CHECK-GI-LABEL: dup_const2:
533546
; CHECK-GI: // %bb.0:
534-
; CHECK-GI-NEXT: adrp x8, .LCPI38_0
535-
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI38_0]
547+
; CHECK-GI-NEXT: adrp x8, .LCPI39_0
548+
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI39_0]
536549
; CHECK-GI-NEXT: add.2s v0, v0, v1
537550
; CHECK-GI-NEXT: ret
538551
%tmp2 = add <2 x i32> %A, <i32 8421378, i32 8421378>
@@ -550,8 +563,8 @@ define <2 x i32> @dup_const4_ext(<4 x i32> %A) nounwind {
550563
;
551564
; CHECK-GI-LABEL: dup_const4_ext:
552565
; CHECK-GI: // %bb.0:
553-
; CHECK-GI-NEXT: adrp x8, .LCPI39_0
554-
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI39_0]
566+
; CHECK-GI-NEXT: adrp x8, .LCPI40_0
567+
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI40_0]
555568
; CHECK-GI-NEXT: add.4s v0, v0, v1
556569
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
557570
; CHECK-GI-NEXT: ret
@@ -575,12 +588,12 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind
575588
;
576589
; CHECK-GI-LABEL: dup_const24:
577590
; CHECK-GI: // %bb.0:
578-
; CHECK-GI-NEXT: adrp x8, .LCPI40_1
591+
; CHECK-GI-NEXT: adrp x8, .LCPI41_1
579592
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
580-
; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI40_1]
581-
; CHECK-GI-NEXT: adrp x8, .LCPI40_0
593+
; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI41_1]
594+
; CHECK-GI-NEXT: adrp x8, .LCPI41_0
582595
; CHECK-GI-NEXT: add.2s v0, v0, v3
583-
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI40_0]
596+
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI41_0]
584597
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
585598
; CHECK-GI-NEXT: add.4s v1, v2, v3
586599
; CHECK-GI-NEXT: eor.16b v0, v1, v0
@@ -687,3 +700,17 @@ define <8 x i16> @bitcast_v2f64_v8i16(<2 x i64> %a) {
687700
ret <8 x i16> %r
688701
}
689702

703+
define <4 x i16> @dup_i16_v4i16_constant() {
704+
; CHECK-SD-LABEL: dup_i16_v4i16_constant:
705+
; CHECK-SD: // %bb.0:
706+
; CHECK-SD-NEXT: mov w8, #9211 // =0x23fb
707+
; CHECK-SD-NEXT: dup.4h v0, w8
708+
; CHECK-SD-NEXT: ret
709+
;
710+
; CHECK-GI-LABEL: dup_i16_v4i16_constant:
711+
; CHECK-GI: // %bb.0:
712+
; CHECK-GI-NEXT: adrp x8, .LCPI50_0
713+
; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI50_0]
714+
; CHECK-GI-NEXT: ret
715+
ret <4 x i16> <i16 9211, i16 9211, i16 9211, i16 9211>
716+
}

0 commit comments

Comments
 (0)