Skip to content

Commit c9a5ead

Browse files
committed
[AArch64] Add custom lowering for load <3 x i8>. (llvm#78632)
Add custom combine to lower load <3 x i8> as the more efficient sequence below: ldrb wX, [x0, swiftlang#2] ldrh wY, [x0] orr wX, wY, wX, lsl swiftlang#16 fmov s0, wX At the moment, there are almost no cases in which such vector operations will be generated automatically. The motivating case is non-power-of-2 SLP vectorization: llvm#77790 (cherry-picked from d1e162e)
1 parent ae2fb1c commit c9a5ead

File tree

2 files changed

+120
-110
lines changed

2 files changed

+120
-110
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20709,6 +20709,61 @@ static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
2070920709
return SDValue();
2071020710
}
2071120711

20712+
// A custom combine to lower load <3 x i8> as the more efficient sequence
20713+
// below:
20714+
// ldrb wX, [x0, #2]
20715+
// ldrh wY, [x0]
20716+
// orr wX, wY, wX, lsl #16
20717+
// fmov s0, wX
20718+
//
20719+
// Note that an alternative sequence with even fewer (although usually more
20720+
// complex/expensive) instructions would be:
20721+
// ld1r.4h { v0 }, [x0], #2
20722+
// ld1.b { v0 }[2], [x0]
20723+
//
20724+
// Generating this sequence unfortunately results in noticeably worse codegen
20725+
// for code that extends the loaded v3i8, due to legalization breaking vector
20726+
// shuffle detection in a way that is very difficult to work around.
20727+
// TODO: Revisit once v3i8 legalization has been improved in general.
20728+
static SDValue combineV3I8LoadExt(LoadSDNode *LD, SelectionDAG &DAG) {
20729+
EVT MemVT = LD->getMemoryVT();
20730+
if (MemVT != EVT::getVectorVT(*DAG.getContext(), MVT::i8, 3) ||
20731+
LD->getOriginalAlign() >= 4)
20732+
return SDValue();
20733+
20734+
SDLoc DL(LD);
20735+
MachineFunction &MF = DAG.getMachineFunction();
20736+
SDValue Chain = LD->getChain();
20737+
SDValue BasePtr = LD->getBasePtr();
20738+
MachineMemOperand *MMO = LD->getMemOperand();
20739+
assert(LD->getOffset().isUndef() && "undef offset expected");
20740+
20741+
// Load 2 x i8, then 1 x i8.
20742+
SDValue L16 = DAG.getLoad(MVT::i16, DL, Chain, BasePtr, MMO);
20743+
TypeSize Offset2 = TypeSize::getFixed(2);
20744+
SDValue L8 = DAG.getLoad(MVT::i8, DL, Chain,
20745+
DAG.getMemBasePlusOffset(BasePtr, Offset2, DL),
20746+
MF.getMachineMemOperand(MMO, 2, 1));
20747+
20748+
// Extend to i32.
20749+
SDValue Ext16 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, L16);
20750+
SDValue Ext8 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, L8);
20751+
20752+
// Pack 2 x i8 and 1 x i8 in an i32 and convert to v4i8.
20753+
SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i32, Ext8,
20754+
DAG.getConstant(16, DL, MVT::i32));
20755+
SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i32, Ext16, Shl);
20756+
SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::v4i8, Or);
20757+
20758+
// Extract v3i8 again.
20759+
SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MemVT, Cast,
20760+
DAG.getConstant(0, DL, MVT::i64));
20761+
SDValue TokenFactor = DAG.getNode(
20762+
ISD::TokenFactor, DL, MVT::Other,
20763+
{SDValue(cast<SDNode>(L16), 1), SDValue(cast<SDNode>(L8), 1)});
20764+
return DAG.getMergeValues({Extract, TokenFactor}, DL);
20765+
}
20766+
2071220767
// Perform TBI simplification if supported by the target and try to break up
2071320768
// nontemporal loads larger than 256-bits loads for odd types so LDNPQ 256-bit
2071420769
// load instructions can be selected.
@@ -20720,10 +20775,16 @@ static SDValue performLOADCombine(SDNode *N,
2072020775
performTBISimplification(N->getOperand(1), DCI, DAG);
2072120776

2072220777
LoadSDNode *LD = cast<LoadSDNode>(N);
20723-
EVT MemVT = LD->getMemoryVT();
20724-
if (LD->isVolatile() || !LD->isNonTemporal() || !Subtarget->isLittleEndian())
20778+
if (LD->isVolatile() || !Subtarget->isLittleEndian())
2072520779
return SDValue(N, 0);
2072620780

20781+
if (SDValue Res = combineV3I8LoadExt(LD, DAG))
20782+
return Res;
20783+
20784+
if (!LD->isNonTemporal())
20785+
return SDValue(N, 0);
20786+
20787+
EVT MemVT = LD->getMemoryVT();
2072720788
if (MemVT.isScalableVector() || MemVT.getSizeInBits() <= 256 ||
2072820789
MemVT.getSizeInBits() % 256 == 0 ||
2072920790
256 % MemVT.getScalarSizeInBits() != 0)

llvm/test/CodeGen/AArch64/vec3-loads-ext-trunc-stores.ll

Lines changed: 57 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,10 @@
55
define <16 x i8> @load_v3i8(ptr %src) {
66
; CHECK-LABEL: load_v3i8:
77
; CHECK: ; %bb.0:
8-
; CHECK-NEXT: sub sp, sp, #16
9-
; CHECK-NEXT: .cfi_def_cfa_offset 16
10-
; CHECK-NEXT: ldrh w8, [x0]
11-
; CHECK-NEXT: strh w8, [sp, #12]
12-
; CHECK-NEXT: ldr s0, [sp, #12]
13-
; CHECK-NEXT: ushll.8h v0, v0, #0
14-
; CHECK-NEXT: umov.h w8, v0[0]
15-
; CHECK-NEXT: umov.h w9, v0[1]
8+
; CHECK-NEXT: ldrb w8, [x0, #2]
9+
; CHECK-NEXT: ldrh w9, [x0]
10+
; CHECK-NEXT: orr w8, w9, w8, lsl #16
1611
; CHECK-NEXT: fmov s0, w8
17-
; CHECK-NEXT: add x8, x0, #2
18-
; CHECK-NEXT: mov.b v0[1], w9
19-
; CHECK-NEXT: ld1.b { v0 }[2], [x8]
20-
; CHECK-NEXT: add sp, sp, #16
2112
; CHECK-NEXT: ret
2213
;
2314
; BE-LABEL: load_v3i8:
@@ -47,19 +38,14 @@ define <16 x i8> @load_v3i8(ptr %src) {
4738
define <4 x i32> @load_v3i8_to_4xi32(ptr %src) {
4839
; CHECK-LABEL: load_v3i8_to_4xi32:
4940
; CHECK: ; %bb.0:
50-
; CHECK-NEXT: sub sp, sp, #16
51-
; CHECK-NEXT: .cfi_def_cfa_offset 16
52-
; CHECK-NEXT: ldrh w8, [x0]
41+
; CHECK-NEXT: ldrb w8, [x0, #2]
42+
; CHECK-NEXT: ldrh w9, [x0]
5343
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
54-
; CHECK-NEXT: strh w8, [sp, #12]
55-
; CHECK-NEXT: ldr s0, [sp, #12]
56-
; CHECK-NEXT: ldrsb w8, [x0, #2]
57-
; CHECK-NEXT: ushll.8h v0, v0, #0
58-
; CHECK-NEXT: mov.h v0[1], v0[1]
59-
; CHECK-NEXT: mov.h v0[2], w8
44+
; CHECK-NEXT: orr w8, w9, w8, lsl #16
45+
; CHECK-NEXT: fmov s0, w8
46+
; CHECK-NEXT: zip1.8b v0, v0, v0
6047
; CHECK-NEXT: ushll.4s v0, v0, #0
6148
; CHECK-NEXT: and.16b v0, v0, v1
62-
; CHECK-NEXT: add sp, sp, #16
6349
; CHECK-NEXT: ret
6450
;
6551
; BE-LABEL: load_v3i8_to_4xi32:
@@ -90,19 +76,14 @@ define <4 x i32> @load_v3i8_to_4xi32(ptr %src) {
9076
define <4 x i32> @load_v3i8_to_4xi32_align_2(ptr %src) {
9177
; CHECK-LABEL: load_v3i8_to_4xi32_align_2:
9278
; CHECK: ; %bb.0:
93-
; CHECK-NEXT: sub sp, sp, #16
94-
; CHECK-NEXT: .cfi_def_cfa_offset 16
95-
; CHECK-NEXT: ldrh w8, [x0]
79+
; CHECK-NEXT: ldrb w8, [x0, #2]
80+
; CHECK-NEXT: ldrh w9, [x0]
9681
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
97-
; CHECK-NEXT: strh w8, [sp, #12]
98-
; CHECK-NEXT: ldr s0, [sp, #12]
99-
; CHECK-NEXT: ldrsb w8, [x0, #2]
100-
; CHECK-NEXT: ushll.8h v0, v0, #0
101-
; CHECK-NEXT: mov.h v0[1], v0[1]
102-
; CHECK-NEXT: mov.h v0[2], w8
82+
; CHECK-NEXT: orr w8, w9, w8, lsl #16
83+
; CHECK-NEXT: fmov s0, w8
84+
; CHECK-NEXT: zip1.8b v0, v0, v0
10385
; CHECK-NEXT: ushll.4s v0, v0, #0
10486
; CHECK-NEXT: and.16b v0, v0, v1
105-
; CHECK-NEXT: add sp, sp, #16
10687
; CHECK-NEXT: ret
10788
;
10889
; BE-LABEL: load_v3i8_to_4xi32_align_2:
@@ -161,19 +142,14 @@ define <4 x i32> @load_v3i8_to_4xi32_align_4(ptr %src) {
161142
define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) {
162143
; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_1:
163144
; CHECK: ; %bb.0:
164-
; CHECK-NEXT: sub sp, sp, #16
165-
; CHECK-NEXT: .cfi_def_cfa_offset 16
166-
; CHECK-NEXT: ldurh w8, [x0, #1]
145+
; CHECK-NEXT: ldrb w8, [x0, #3]
146+
; CHECK-NEXT: ldurh w9, [x0, #1]
167147
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
168-
; CHECK-NEXT: strh w8, [sp, #12]
169-
; CHECK-NEXT: ldr s0, [sp, #12]
170-
; CHECK-NEXT: ldrsb w8, [x0, #3]
171-
; CHECK-NEXT: ushll.8h v0, v0, #0
172-
; CHECK-NEXT: mov.h v0[1], v0[1]
173-
; CHECK-NEXT: mov.h v0[2], w8
148+
; CHECK-NEXT: orr w8, w9, w8, lsl #16
149+
; CHECK-NEXT: fmov s0, w8
150+
; CHECK-NEXT: zip1.8b v0, v0, v0
174151
; CHECK-NEXT: ushll.4s v0, v0, #0
175152
; CHECK-NEXT: and.16b v0, v0, v1
176-
; CHECK-NEXT: add sp, sp, #16
177153
; CHECK-NEXT: ret
178154
;
179155
; BE-LABEL: load_v3i8_to_4xi32_const_offset_1:
@@ -205,19 +181,14 @@ define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) {
205181
define <4 x i32> @load_v3i8_to_4xi32_const_offset_3(ptr %src) {
206182
; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_3:
207183
; CHECK: ; %bb.0:
208-
; CHECK-NEXT: sub sp, sp, #16
209-
; CHECK-NEXT: .cfi_def_cfa_offset 16
210-
; CHECK-NEXT: ldurh w8, [x0, #3]
184+
; CHECK-NEXT: ldrb w8, [x0, #5]
185+
; CHECK-NEXT: ldurh w9, [x0, #3]
211186
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
212-
; CHECK-NEXT: strh w8, [sp, #12]
213-
; CHECK-NEXT: ldr s0, [sp, #12]
214-
; CHECK-NEXT: ldrsb w8, [x0, #5]
215-
; CHECK-NEXT: ushll.8h v0, v0, #0
216-
; CHECK-NEXT: mov.h v0[1], v0[1]
217-
; CHECK-NEXT: mov.h v0[2], w8
187+
; CHECK-NEXT: orr w8, w9, w8, lsl #16
188+
; CHECK-NEXT: fmov s0, w8
189+
; CHECK-NEXT: zip1.8b v0, v0, v0
218190
; CHECK-NEXT: ushll.4s v0, v0, #0
219191
; CHECK-NEXT: and.16b v0, v0, v1
220-
; CHECK-NEXT: add sp, sp, #16
221192
; CHECK-NEXT: ret
222193
;
223194
; BE-LABEL: load_v3i8_to_4xi32_const_offset_3:
@@ -349,18 +320,14 @@ define <3 x i32> @load_v3i32(ptr %src) {
349320
define <3 x i32> @load_v3i8_zext_to_3xi32(ptr %src) {
350321
; CHECK-LABEL: load_v3i8_zext_to_3xi32:
351322
; CHECK: ; %bb.0:
352-
; CHECK-NEXT: sub sp, sp, #16
353-
; CHECK-NEXT: .cfi_def_cfa_offset 16
354-
; CHECK-NEXT: ldrh w8, [x0]
323+
; CHECK-NEXT: ldrb w8, [x0, #2]
324+
; CHECK-NEXT: ldrh w9, [x0]
355325
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
356-
; CHECK-NEXT: strh w8, [sp, #12]
357-
; CHECK-NEXT: add x8, x0, #2
358-
; CHECK-NEXT: ldr s0, [sp, #12]
359-
; CHECK-NEXT: ushll.8h v0, v0, #0
360-
; CHECK-NEXT: ld1.b { v0 }[4], [x8]
326+
; CHECK-NEXT: orr w8, w9, w8, lsl #16
327+
; CHECK-NEXT: fmov s0, w8
328+
; CHECK-NEXT: zip1.8b v0, v0, v0
361329
; CHECK-NEXT: ushll.4s v0, v0, #0
362330
; CHECK-NEXT: and.16b v0, v0, v1
363-
; CHECK-NEXT: add sp, sp, #16
364331
; CHECK-NEXT: ret
365332
;
366333
; BE-LABEL: load_v3i8_zext_to_3xi32:
@@ -389,18 +356,14 @@ define <3 x i32> @load_v3i8_zext_to_3xi32(ptr %src) {
389356
define <3 x i32> @load_v3i8_sext_to_3xi32(ptr %src) {
390357
; CHECK-LABEL: load_v3i8_sext_to_3xi32:
391358
; CHECK: ; %bb.0:
392-
; CHECK-NEXT: sub sp, sp, #16
393-
; CHECK-NEXT: .cfi_def_cfa_offset 16
394-
; CHECK-NEXT: ldrh w8, [x0]
395-
; CHECK-NEXT: strh w8, [sp, #12]
396-
; CHECK-NEXT: add x8, x0, #2
397-
; CHECK-NEXT: ldr s0, [sp, #12]
398-
; CHECK-NEXT: ushll.8h v0, v0, #0
399-
; CHECK-NEXT: ld1.b { v0 }[4], [x8]
359+
; CHECK-NEXT: ldrb w8, [x0, #2]
360+
; CHECK-NEXT: ldrh w9, [x0]
361+
; CHECK-NEXT: orr w8, w9, w8, lsl #16
362+
; CHECK-NEXT: fmov s0, w8
363+
; CHECK-NEXT: zip1.8b v0, v0, v0
400364
; CHECK-NEXT: ushll.4s v0, v0, #0
401365
; CHECK-NEXT: shl.4s v0, v0, #24
402366
; CHECK-NEXT: sshr.4s v0, v0, #24
403-
; CHECK-NEXT: add sp, sp, #16
404367
; CHECK-NEXT: ret
405368
;
406369
; BE-LABEL: load_v3i8_sext_to_3xi32:
@@ -514,19 +477,15 @@ entry:
514477
define void @load_ext_to_64bits(ptr %src, ptr %dst) {
515478
; CHECK-LABEL: load_ext_to_64bits:
516479
; CHECK: ; %bb.0: ; %entry
517-
; CHECK-NEXT: sub sp, sp, #16
518-
; CHECK-NEXT: .cfi_def_cfa_offset 16
519-
; CHECK-NEXT: ldrh w8, [x0]
520-
; CHECK-NEXT: strh w8, [sp, #12]
521-
; CHECK-NEXT: add x8, x0, #2
522-
; CHECK-NEXT: ldr s0, [sp, #12]
523-
; CHECK-NEXT: ushll.8h v0, v0, #0
524-
; CHECK-NEXT: ld1.b { v0 }[4], [x8]
480+
; CHECK-NEXT: ldrb w9, [x0, #2]
525481
; CHECK-NEXT: add x8, x1, #4
482+
; CHECK-NEXT: ldrh w10, [x0]
483+
; CHECK-NEXT: orr w9, w10, w9, lsl #16
484+
; CHECK-NEXT: fmov s0, w9
485+
; CHECK-NEXT: zip1.8b v0, v0, v0
526486
; CHECK-NEXT: bic.4h v0, #255, lsl #8
527487
; CHECK-NEXT: st1.h { v0 }[2], [x8]
528488
; CHECK-NEXT: str s0, [x1]
529-
; CHECK-NEXT: add sp, sp, #16
530489
; CHECK-NEXT: ret
531490
;
532491
; BE-LABEL: load_ext_to_64bits:
@@ -617,24 +576,20 @@ entry:
617576
define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {
618577
; CHECK-LABEL: load_ext_add_to_64bits:
619578
; CHECK: ; %bb.0: ; %entry
620-
; CHECK-NEXT: sub sp, sp, #16
621-
; CHECK-NEXT: .cfi_def_cfa_offset 16
622-
; CHECK-NEXT: ldrh w8, [x0]
579+
; CHECK-NEXT: ldrb w8, [x0, #2]
580+
; CHECK-NEXT: ldrh w9, [x0]
581+
; CHECK-NEXT: orr w8, w9, w8, lsl #16
582+
; CHECK-NEXT: fmov s0, w8
623583
; CHECK-NEXT: Lloh2:
624-
; CHECK-NEXT: adrp x9, lCPI15_0@PAGE
625-
; CHECK-NEXT: strh w8, [sp, #12]
626-
; CHECK-NEXT: add x8, x0, #2
627-
; CHECK-NEXT: ldr s0, [sp, #12]
584+
; CHECK-NEXT: adrp x8, lCPI15_0@PAGE
585+
; CHECK-NEXT: zip1.8b v0, v0, v0
628586
; CHECK-NEXT: Lloh3:
629-
; CHECK-NEXT: ldr d1, [x9, lCPI15_0@PAGEOFF]
630-
; CHECK-NEXT: ushll.8h v0, v0, #0
631-
; CHECK-NEXT: ld1.b { v0 }[4], [x8]
587+
; CHECK-NEXT: ldr d1, [x8, lCPI15_0@PAGEOFF]
632588
; CHECK-NEXT: add x8, x1, #4
633589
; CHECK-NEXT: bic.4h v0, #255, lsl #8
634590
; CHECK-NEXT: add.4h v0, v0, v1
635591
; CHECK-NEXT: st1.h { v0 }[2], [x8]
636592
; CHECK-NEXT: str s0, [x1]
637-
; CHECK-NEXT: add sp, sp, #16
638593
; CHECK-NEXT: ret
639594
; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3
640595
;
@@ -883,24 +838,21 @@ define void @shift_trunc_volatile_store(ptr %src, ptr %dst) {
883838
define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) {
884839
; CHECK-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
885840
; CHECK: ; %bb.0:
886-
; CHECK-NEXT: sub sp, sp, #16
887-
; CHECK-NEXT: .cfi_def_cfa_offset 16
888-
; CHECK-NEXT: ldrh w9, [x0]
841+
; CHECK-NEXT: ldrb w10, [x0, #2]
889842
; CHECK-NEXT: Lloh4:
890843
; CHECK-NEXT: adrp x8, lCPI22_0@PAGE
891-
; CHECK-NEXT: strh w9, [sp, #12]
844+
; CHECK-NEXT: ldrh w11, [x0]
892845
; CHECK-NEXT: add x9, x0, #2
893-
; CHECK-NEXT: ldr s0, [sp, #12]
894846
; CHECK-NEXT: Lloh5:
895847
; CHECK-NEXT: ldr q1, [x8, lCPI22_0@PAGEOFF]
896848
; CHECK-NEXT: add x8, x0, #1
897-
; CHECK-NEXT: ushll.8h v0, v0, #0
898-
; CHECK-NEXT: ld1.b { v0 }[4], [x9]
849+
; CHECK-NEXT: orr w10, w11, w10, lsl #16
850+
; CHECK-NEXT: fmov s0, w10
851+
; CHECK-NEXT: zip1.8b v0, v0, v0
899852
; CHECK-NEXT: uaddw.4s v0, v1, v0
900-
; CHECK-NEXT: st1.b { v0 }[4], [x8]
901853
; CHECK-NEXT: st1.b { v0 }[8], [x9]
902854
; CHECK-NEXT: st1.b { v0 }[0], [x0]
903-
; CHECK-NEXT: add sp, sp, #16
855+
; CHECK-NEXT: st1.b { v0 }[4], [x8]
904856
; CHECK-NEXT: ret
905857
; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
906858
;
@@ -939,24 +891,21 @@ define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) {
939891
define void @load_v3i8_sext_to_3xi32_add_trunc_store(ptr %src) {
940892
; CHECK-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
941893
; CHECK: ; %bb.0:
942-
; CHECK-NEXT: sub sp, sp, #16
943-
; CHECK-NEXT: .cfi_def_cfa_offset 16
944-
; CHECK-NEXT: ldrh w9, [x0]
894+
; CHECK-NEXT: ldrb w10, [x0, #2]
945895
; CHECK-NEXT: Lloh6:
946896
; CHECK-NEXT: adrp x8, lCPI23_0@PAGE
947-
; CHECK-NEXT: strh w9, [sp, #12]
897+
; CHECK-NEXT: ldrh w11, [x0]
948898
; CHECK-NEXT: add x9, x0, #2
949-
; CHECK-NEXT: ldr s0, [sp, #12]
950899
; CHECK-NEXT: Lloh7:
951900
; CHECK-NEXT: ldr q1, [x8, lCPI23_0@PAGEOFF]
952901
; CHECK-NEXT: add x8, x0, #1
953-
; CHECK-NEXT: ushll.8h v0, v0, #0
954-
; CHECK-NEXT: ld1.b { v0 }[4], [x9]
902+
; CHECK-NEXT: orr w10, w11, w10, lsl #16
903+
; CHECK-NEXT: fmov s0, w10
904+
; CHECK-NEXT: zip1.8b v0, v0, v0
955905
; CHECK-NEXT: uaddw.4s v0, v1, v0
956-
; CHECK-NEXT: st1.b { v0 }[4], [x8]
957906
; CHECK-NEXT: st1.b { v0 }[8], [x9]
958907
; CHECK-NEXT: st1.b { v0 }[0], [x0]
959-
; CHECK-NEXT: add sp, sp, #16
908+
; CHECK-NEXT: st1.b { v0 }[4], [x8]
960909
; CHECK-NEXT: ret
961910
; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
962911
;

0 commit comments

Comments
 (0)