Skip to content

Commit e5cb18b

Browse files
committed
[AArch64] Make use of byte FPR stores bytes extracted from vectors
This helps avoid some pointless `fmovs` in some cases. Currently, this is done in ISEL as FPR bytes are problematic in SDAG (as neither GPR or FPR bytes are a legal type).
1 parent 8a3fe30 commit e5cb18b

32 files changed

+261
-136
lines changed

llvm/include/llvm/CodeGen/ValueTypes.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,8 @@ def amdgpuBufferFatPointer : ValueType<160, 234>;
338338
// FIXME: Remove this and the getPointerType() override if MVT::i82 is added.
339339
def amdgpuBufferStridedPointer : ValueType<192, 235>;
340340

341+
def vi8 : ValueType<8, 236>; // 8-bit integer in FPR (AArch64)
342+
341343
let isNormalValueType = false in {
342344
def token : ValueType<0, 504>; // TokenTy
343345
def MetadataVT : ValueType<0, 505> { // Metadata

llvm/lib/CodeGen/ValueTypes.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,8 @@ std::string EVT::getEVTString() const {
198198
return "amdgpuBufferFatPointer";
199199
case MVT::amdgpuBufferStridedPointer:
200200
return "amdgpuBufferStridedPointer";
201+
case MVT::vi8:
202+
return "vi8";
201203
}
202204
}
203205

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
401401
}
402402

403403
if (Subtarget->hasFPARMv8()) {
404+
addRegisterClass(MVT::vi8, &AArch64::FPR8RegClass);
404405
addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
405406
addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
406407
addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3575,7 +3575,7 @@ defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr",
35753575
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
35763576
let Predicates = [HasFPARMv8] in {
35773577
defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
3578-
[(set FPR8Op:$Rt,
3578+
[(set (i8 FPR8Op:$Rt),
35793579
(load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
35803580
defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
35813581
[(set (f16 FPR16Op:$Rt),
@@ -3763,7 +3763,7 @@ defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur",
37633763
(load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
37643764
let Predicates = [HasFPARMv8] in {
37653765
defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
3766-
[(set FPR8Op:$Rt,
3766+
[(set (i8 FPR8Op:$Rt),
37673767
(load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
37683768
defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
37693769
[(set (f16 FPR16Op:$Rt),
@@ -4333,7 +4333,7 @@ defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
43334333
(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
43344334
let Predicates = [HasFPARMv8] in {
43354335
defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
4336-
[(store FPR8Op:$Rt,
4336+
[(store (i8 FPR8Op:$Rt),
43374337
(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
43384338
defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
43394339
[(store (f16 FPR16Op:$Rt),
@@ -4451,6 +4451,8 @@ multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop
44514451
}
44524452

44534453
let AddedComplexity = 19 in {
4454+
defm : VecStoreLane0Pat<am_indexed8, truncstorei8, v16i8, i32, vi8, bsub, uimm12s2, STRBui>;
4455+
defm : VecStoreLane0Pat<am_indexed8, truncstorei8, v4i32, i32, vi8, bsub, uimm12s2, STRBui>;
44544456
defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>;
44554457
defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, f16, hsub, uimm12s2, STRHui>;
44564458
defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, i32, ssub, uimm12s4, STRSui>;
@@ -4469,7 +4471,7 @@ defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur",
44694471
(am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
44704472
let Predicates = [HasFPARMv8] in {
44714473
defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
4472-
[(store FPR8Op:$Rt,
4474+
[(store (i8 FPR8Op:$Rt),
44734475
(am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
44744476
defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
44754477
[(store (f16 FPR16Op:$Rt),
@@ -4598,6 +4600,7 @@ multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
45984600
}
45994601

46004602
let AddedComplexity = 19 in {
4603+
defm : VecStoreULane0Pat<truncstorei8, v16i8, i32, vi8, bsub, STURBi>;
46014604
defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, f16, hsub, STURHi>;
46024605
defm : VecStoreULane0Pat<store, v8f16, f16, f16, hsub, STURHi>;
46034606
defm : VecStoreULane0Pat<store, v4i32, i32, i32, ssub, STURSi>;

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ def Q30 : AArch64Reg<30, "q30", [D30, D30_HI], ["v30", ""]>, DwarfRegAlias<B30
497497
def Q31 : AArch64Reg<31, "q31", [D31, D31_HI], ["v31", ""]>, DwarfRegAlias<B31>;
498498
}
499499

500-
def FPR8 : RegisterClass<"AArch64", [i8], 8, (sequence "B%u", 0, 31)> {
500+
def FPR8 : RegisterClass<"AArch64", [i8, vi8], 8, (sequence "B%u", 0, 31)> {
501501
let Size = 8;
502502
let DecoderMethod = "DecodeSimpleRegisterClass<AArch64::FPR8RegClassID, 0, 32>";
503503
}

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1827,6 +1827,43 @@ let Predicates = [HasSVE] in {
18271827
defm : adrXtwShiftPat<nxv2i64, nxv2i1, 3>;
18281828
} // End HasSVE
18291829

1830+
multiclass SVEVecStoreLanePat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
1831+
ValueType VTy, ValueType STy,
1832+
ValueType SubRegTy,
1833+
SubRegIndex SubRegIdx, Operand IndexType,
1834+
Instruction STR,
1835+
Instruction DUP, AsmVectorIndexOpnd DUPIdxTy> {
1836+
let Predicates = [HasSVE_or_SME] in {
1837+
// Same as Neon VecStoreLane0Pat but without matching VecListOne128.
1838+
def : Pat<(storeop (STy (vector_extract VTy:$Vt, (i64 0))),
1839+
(UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
1840+
(STR (SubRegTy (EXTRACT_SUBREG $Vt, SubRegIdx)),
1841+
GPR64sp:$Rn, IndexType:$offset)>;
1842+
}
1843+
1844+
// Non-zero immediate index:
1845+
def : Pat<(storeop (STy (vector_extract VTy:$Vt, DUPIdxTy:$idx)),
1846+
(UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
1847+
(STR (SubRegTy (EXTRACT_SUBREG (DUP $Vt, DUPIdxTy:$idx), SubRegIdx)),
1848+
GPR64sp:$Rn, IndexType:$offset)>;
1849+
}
1850+
1851+
// Note: Types other than i8 are handled in performSTORECombine -- i8 is tricky
1852+
// to handle before ISEL as it is not really a legal type in many places, nor
1853+
// is its equivalently sized FP variant.
1854+
let AddedComplexity = 19 in {
1855+
// Lane 0 truncating stores
1856+
// i32 -> i8
1857+
defm : SVEVecStoreLanePat<am_indexed8, truncstorei8, nxv4i32, i32, vi8, bsub, uimm12s4, STRBui, DUP_ZZI_S, sve_elm_idx_extdup_s>;
1858+
defm : SVEVecStoreLanePat<am_unscaled8, truncstorei8, nxv4i32, i32, vi8, bsub, simm9, STURBi, DUP_ZZI_S, sve_elm_idx_extdup_s>;
1859+
// i64 -> i8
1860+
defm : SVEVecStoreLanePat<am_indexed8, truncstorei8, nxv2i64, i64, vi8, bsub, uimm12s4, STRBui, DUP_ZZI_D, sve_elm_idx_extdup_d>;
1861+
defm : SVEVecStoreLanePat<am_unscaled8, truncstorei8, nxv2i64, i64, vi8, bsub, simm9, STURBi, DUP_ZZI_D, sve_elm_idx_extdup_d>;
1862+
// i8 -> i8 (technically a truncate as the extracted type is i32)
1863+
defm : SVEVecStoreLanePat<am_indexed8, truncstorei8, nxv16i8, i32, vi8, bsub, uimm12s4, STRBui, DUP_ZZI_B, sve_elm_idx_extdup_b>;
1864+
defm : SVEVecStoreLanePat<am_unscaled8, truncstorei8, nxv16i8, i32, vi8, bsub, simm9, STURBi, DUP_ZZI_B, sve_elm_idx_extdup_b>;
1865+
}
1866+
18301867
let Predicates = [HasSVE_or_SME] in {
18311868
defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>;
18321869

llvm/test/CodeGen/AArch64/aarch64-sve-ldst-one.ll

Lines changed: 128 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTREAMING
3-
; RUN: llc < %s -verify-machineinstrs -mattr=+sme -global-isel=0 -force-streaming | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT
4-
; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 -force-streaming-compatible | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT
2+
; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 | FileCheck %s
3+
; RUN: llc < %s -verify-machineinstrs -mattr=+sme -global-isel=0 -force-streaming | FileCheck %s
4+
; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 -force-streaming-compatible | FileCheck %s
55

66
target triple = "aarch64-unknown-linux-gnu"
77

@@ -106,18 +106,11 @@ entry:
106106
}
107107

108108
define void @test_str_lane_s8(ptr %a, <vscale x 16 x i8> %b) {
109-
; CHECK-NONSTREAMING-LABEL: test_str_lane_s8:
110-
; CHECK-NONSTREAMING: // %bb.0: // %entry
111-
; CHECK-NONSTREAMING-NEXT: umov w8, v0.b[7]
112-
; CHECK-NONSTREAMING-NEXT: strb w8, [x0]
113-
; CHECK-NONSTREAMING-NEXT: ret
114-
;
115-
; STREAMING-COMPAT-LABEL: test_str_lane_s8:
116-
; STREAMING-COMPAT: // %bb.0: // %entry
117-
; STREAMING-COMPAT-NEXT: mov z0.b, z0.b[7]
118-
; STREAMING-COMPAT-NEXT: fmov w8, s0
119-
; STREAMING-COMPAT-NEXT: strb w8, [x0]
120-
; STREAMING-COMPAT-NEXT: ret
109+
; CHECK-LABEL: test_str_lane_s8:
110+
; CHECK: // %bb.0: // %entry
111+
; CHECK-NEXT: mov z0.b, z0.b[7]
112+
; CHECK-NEXT: str b0, [x0]
113+
; CHECK-NEXT: ret
121114

122115
entry:
123116
%0 = extractelement <vscale x 16 x i8> %b, i32 7
@@ -128,8 +121,7 @@ entry:
128121
define void @test_str_lane0_s8(ptr %a, <vscale x 16 x i8> %b) {
129122
; CHECK-LABEL: test_str_lane0_s8:
130123
; CHECK: // %bb.0: // %entry
131-
; CHECK-NEXT: fmov w8, s0
132-
; CHECK-NEXT: strb w8, [x0]
124+
; CHECK-NEXT: str b0, [x0]
133125
; CHECK-NEXT: ret
134126

135127
entry:
@@ -201,6 +193,19 @@ define void @test_str_reduction_i32_to_i16(ptr %ptr, <vscale x 4 x i1> %p0, <vsc
201193
ret void
202194
}
203195

196+
define void @test_str_reduction_i32_to_i8(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) {
197+
; CHECK-LABEL: test_str_reduction_i32_to_i8:
198+
; CHECK: // %bb.0:
199+
; CHECK-NEXT: uaddv d0, p0, z0.s
200+
; CHECK-NEXT: str b0, [x0]
201+
; CHECK-NEXT: ret
202+
203+
%reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v)
204+
%trunc = trunc i64 %reduce to i8
205+
store i8 %trunc, ptr %ptr, align 1
206+
ret void
207+
}
208+
204209
define void @test_str_reduction_i32_to_i32_negative_offset(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) {
205210
; CHECK-LABEL: test_str_reduction_i32_to_i32_negative_offset:
206211
; CHECK: // %bb.0:
@@ -242,6 +247,20 @@ define void @test_str_reduction_i32_to_i16_negative_offset(ptr %ptr, <vscale x 4
242247
ret void
243248
}
244249

250+
define void @test_str_reduction_i32_to_i8_negative_offset(ptr %ptr, <vscale x 4 x i1> %p0, <vscale x 4 x i32> %v) {
251+
; CHECK-LABEL: test_str_reduction_i32_to_i8_negative_offset:
252+
; CHECK: // %bb.0:
253+
; CHECK-NEXT: uaddv d0, p0, z0.s
254+
; CHECK-NEXT: stur b0, [x0, #-8]
255+
; CHECK-NEXT: ret
256+
257+
%reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v)
258+
%trunc = trunc i64 %reduce to i8
259+
%out_ptr = getelementptr inbounds i8, ptr %ptr, i64 -8
260+
store i8 %trunc, ptr %out_ptr, align 1
261+
ret void
262+
}
263+
245264
define void @test_str_lane_s32_negative_offset(ptr %a, <vscale x 4 x i32> %b) {
246265
; CHECK-LABEL: test_str_lane_s32_negative_offset:
247266
; CHECK: // %bb.0: // %entry
@@ -297,18 +316,11 @@ entry:
297316
}
298317

299318
define void @test_str_lane_s8_negative_offset(ptr %a, <vscale x 16 x i8> %b) {
300-
; CHECK-NONSTREAMING-LABEL: test_str_lane_s8_negative_offset:
301-
; CHECK-NONSTREAMING: // %bb.0: // %entry
302-
; CHECK-NONSTREAMING-NEXT: umov w8, v0.b[7]
303-
; CHECK-NONSTREAMING-NEXT: sturb w8, [x0, #-8]
304-
; CHECK-NONSTREAMING-NEXT: ret
305-
;
306-
; STREAMING-COMPAT-LABEL: test_str_lane_s8_negative_offset:
307-
; STREAMING-COMPAT: // %bb.0: // %entry
308-
; STREAMING-COMPAT-NEXT: mov z0.b, z0.b[7]
309-
; STREAMING-COMPAT-NEXT: fmov w8, s0
310-
; STREAMING-COMPAT-NEXT: sturb w8, [x0, #-8]
311-
; STREAMING-COMPAT-NEXT: ret
319+
; CHECK-LABEL: test_str_lane_s8_negative_offset:
320+
; CHECK: // %bb.0: // %entry
321+
; CHECK-NEXT: mov z0.b, z0.b[7]
322+
; CHECK-NEXT: stur b0, [x0, #-8]
323+
; CHECK-NEXT: ret
312324

313325
entry:
314326
%0 = extractelement <vscale x 16 x i8> %b, i32 7
@@ -320,8 +332,7 @@ entry:
320332
define void @test_str_lane0_s8_negative_offset(ptr %a, <vscale x 16 x i8> %b) {
321333
; CHECK-LABEL: test_str_lane0_s8_negative_offset:
322334
; CHECK: // %bb.0: // %entry
323-
; CHECK-NEXT: fmov w8, s0
324-
; CHECK-NEXT: sturb w8, [x0, #-8]
335+
; CHECK-NEXT: stur b0, [x0, #-8]
325336
; CHECK-NEXT: ret
326337

327338
entry:
@@ -385,6 +396,48 @@ entry:
385396
ret void
386397
}
387398

399+
400+
define void @test_str_trunc_lane_s32_to_s8(ptr %a, <vscale x 4 x i32> %b) {
401+
; CHECK-LABEL: test_str_trunc_lane_s32_to_s8:
402+
; CHECK: // %bb.0: // %entry
403+
; CHECK-NEXT: mov z0.s, z0.s[3]
404+
; CHECK-NEXT: str b0, [x0]
405+
; CHECK-NEXT: ret
406+
407+
entry:
408+
%0 = extractelement <vscale x 4 x i32> %b, i32 3
409+
%trunc = trunc i32 %0 to i8
410+
store i8 %trunc, ptr %a, align 1
411+
ret void
412+
}
413+
414+
define void @test_str_trunc_lane0_s32_to_s8(ptr %a, <vscale x 4 x i32> %b) {
415+
; CHECK-LABEL: test_str_trunc_lane0_s32_to_s8:
416+
; CHECK: // %bb.0: // %entry
417+
; CHECK-NEXT: str b0, [x0]
418+
; CHECK-NEXT: ret
419+
420+
entry:
421+
%0 = extractelement <vscale x 4 x i32> %b, i32 0
422+
%trunc = trunc i32 %0 to i8
423+
store i8 %trunc, ptr %a, align 1
424+
ret void
425+
}
426+
427+
define void @test_str_trunc_lane_s64_to_s8(ptr %a, <vscale x 2 x i64> %b) {
428+
; CHECK-LABEL: test_str_trunc_lane_s64_to_s8:
429+
; CHECK: // %bb.0: // %entry
430+
; CHECK-NEXT: mov z0.d, z0.d[3]
431+
; CHECK-NEXT: str b0, [x0]
432+
; CHECK-NEXT: ret
433+
434+
entry:
435+
%0 = extractelement <vscale x 2 x i64> %b, i32 3
436+
%trunc = trunc i64 %0 to i8
437+
store i8 %trunc, ptr %a, align 1
438+
ret void
439+
}
440+
388441
define void @test_str_trunc_lane_s32_to_s16_negative_offset(ptr %a, <vscale x 4 x i32> %b) {
389442
; CHECK-LABEL: test_str_trunc_lane_s32_to_s16_negative_offset:
390443
; CHECK: // %bb.0: // %entry
@@ -413,3 +466,47 @@ entry:
413466
store i16 %trunc, ptr %out_ptr, align 2
414467
ret void
415468
}
469+
470+
define void @test_str_trunc_lane_s32_to_s8_negative_offset(ptr %a, <vscale x 4 x i32> %b) {
471+
; CHECK-LABEL: test_str_trunc_lane_s32_to_s8_negative_offset:
472+
; CHECK: // %bb.0: // %entry
473+
; CHECK-NEXT: mov z0.s, z0.s[3]
474+
; CHECK-NEXT: stur b0, [x0, #-8]
475+
; CHECK-NEXT: ret
476+
477+
entry:
478+
%0 = extractelement <vscale x 4 x i32> %b, i32 3
479+
%trunc = trunc i32 %0 to i8
480+
%out_ptr = getelementptr inbounds i8, ptr %a, i64 -8
481+
store i8 %trunc, ptr %out_ptr, align 1
482+
ret void
483+
}
484+
485+
define void @test_str_trunc_lane0_s32_to_s8_negative_offset(ptr %a, <vscale x 4 x i32> %b) {
486+
; CHECK-LABEL: test_str_trunc_lane0_s32_to_s8_negative_offset:
487+
; CHECK: // %bb.0: // %entry
488+
; CHECK-NEXT: stur b0, [x0, #-8]
489+
; CHECK-NEXT: ret
490+
491+
entry:
492+
%0 = extractelement <vscale x 4 x i32> %b, i32 0
493+
%trunc = trunc i32 %0 to i8
494+
%out_ptr = getelementptr inbounds i8, ptr %a, i64 -8
495+
store i8 %trunc, ptr %out_ptr, align 1
496+
ret void
497+
}
498+
499+
define void @test_str_trunc_lane_s64_to_s8_negative_offset(ptr %a, <vscale x 2 x i64> %b) {
500+
; CHECK-LABEL: test_str_trunc_lane_s64_to_s8_negative_offset:
501+
; CHECK: // %bb.0: // %entry
502+
; CHECK-NEXT: mov z0.d, z0.d[3]
503+
; CHECK-NEXT: stur b0, [x0, #-8]
504+
; CHECK-NEXT: ret
505+
506+
entry:
507+
%0 = extractelement <vscale x 2 x i64> %b, i32 3
508+
%trunc = trunc i64 %0 to i8
509+
%out_ptr = getelementptr inbounds i8, ptr %a, i64 -8
510+
store i8 %trunc, ptr %out_ptr, align 1
511+
ret void
512+
}

llvm/test/CodeGen/AArch64/add.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ define void @v2i8(ptr %p1, ptr %p2) {
6464
; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9]
6565
; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s
6666
; CHECK-SD-NEXT: mov w8, v0.s[1]
67-
; CHECK-SD-NEXT: fmov w9, s0
68-
; CHECK-SD-NEXT: strb w9, [x0]
67+
; CHECK-SD-NEXT: str b0, [x0]
6968
; CHECK-SD-NEXT: strb w8, [x0, #1]
7069
; CHECK-SD-NEXT: ret
7170
;

llvm/test/CodeGen/AArch64/andorxor.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,7 @@ define void @and_v2i8(ptr %p1, ptr %p2) {
184184
; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9]
185185
; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
186186
; CHECK-SD-NEXT: mov w8, v0.s[1]
187-
; CHECK-SD-NEXT: fmov w9, s0
188-
; CHECK-SD-NEXT: strb w9, [x0]
187+
; CHECK-SD-NEXT: str b0, [x0]
189188
; CHECK-SD-NEXT: strb w8, [x0, #1]
190189
; CHECK-SD-NEXT: ret
191190
;
@@ -221,8 +220,7 @@ define void @or_v2i8(ptr %p1, ptr %p2) {
221220
; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9]
222221
; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
223222
; CHECK-SD-NEXT: mov w8, v0.s[1]
224-
; CHECK-SD-NEXT: fmov w9, s0
225-
; CHECK-SD-NEXT: strb w9, [x0]
223+
; CHECK-SD-NEXT: str b0, [x0]
226224
; CHECK-SD-NEXT: strb w8, [x0, #1]
227225
; CHECK-SD-NEXT: ret
228226
;
@@ -258,8 +256,7 @@ define void @xor_v2i8(ptr %p1, ptr %p2) {
258256
; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9]
259257
; CHECK-SD-NEXT: eor v0.8b, v0.8b, v1.8b
260258
; CHECK-SD-NEXT: mov w8, v0.s[1]
261-
; CHECK-SD-NEXT: fmov w9, s0
262-
; CHECK-SD-NEXT: strb w9, [x0]
259+
; CHECK-SD-NEXT: str b0, [x0]
263260
; CHECK-SD-NEXT: strb w8, [x0, #1]
264261
; CHECK-SD-NEXT: ret
265262
;

0 commit comments

Comments
 (0)