Skip to content

Commit 183f53b

Browse files
committed
[ARM] Add support for ARM Instructions for Init Undef Pass
For assembly instructions within the ARM Architecture, certain instructions have early-clobber restraints that are defined for the instruction. However, when using the Greedy register allocator this is ignored. To get around this, we can use the Init Undef pass to assign a Pseudo instruction for the registers that are early-clobber to ensure the restraint is followed. This adds in support for this using a new Pseudo instruction, `PseudoARMInitUndef` which is used to ensure early-clobber restrains are followed. The relevant overriding functions have also been provided to ensure the architecture is supported by the pass and the required information can be passed to ensure that early-clobber restrains are respected.
1 parent a71030b commit 183f53b

File tree

9 files changed

+144
-63
lines changed

9 files changed

+144
-63
lines changed

llvm/lib/Target/ARM/ARMAsmPrinter.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2409,6 +2409,11 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
24092409
case ARM::SEH_EpilogEnd:
24102410
ATS.emitARMWinCFIEpilogEnd();
24112411
return;
2412+
2413+
case ARM::PseudoARMInitUndefMQPR:
2414+
case ARM::PseudoARMInitUndefSPR:
2415+
case ARM::PseudoARMInitUndefDPR_VFP2:
2416+
return;
24122417
}
24132418

24142419
MCInst TmpInst;

llvm/lib/Target/ARM/ARMBaseInstrInfo.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,20 @@
1313
#ifndef LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
1414
#define LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
1515

16+
#include "ARMBaseRegisterInfo.h"
1617
#include "MCTargetDesc/ARMBaseInfo.h"
18+
#include "MCTargetDesc/ARMMCTargetDesc.h"
1719
#include "llvm/ADT/DenseMap.h"
1820
#include "llvm/ADT/SmallSet.h"
1921
#include "llvm/CodeGen/MachineBasicBlock.h"
2022
#include "llvm/CodeGen/MachineInstr.h"
2123
#include "llvm/CodeGen/MachineInstrBuilder.h"
2224
#include "llvm/CodeGen/MachineOperand.h"
25+
#include "llvm/CodeGen/MachineRegisterInfo.h"
2326
#include "llvm/CodeGen/TargetInstrInfo.h"
2427
#include "llvm/IR/IntrinsicInst.h"
2528
#include "llvm/IR/IntrinsicsARM.h"
29+
#include "llvm/Support/ErrorHandling.h"
2630
#include <array>
2731
#include <cstdint>
2832

@@ -536,6 +540,17 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
536540

537541
std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
538542
Register Reg) const override;
543+
544+
unsigned getUndefInitOpcode(unsigned RegClassID) const override {
545+
if (RegClassID == ARM::MQPRRegClass.getID())
546+
return ARM::PseudoARMInitUndefMQPR;
547+
if (RegClassID == ARM::SPRRegClass.getID())
548+
return ARM::PseudoARMInitUndefSPR;
549+
if (RegClassID == ARM::DPR_VFP2RegClass.getID())
550+
return ARM::PseudoARMInitUndefDPR_VFP2;
551+
552+
llvm_unreachable("Unexpected register class.");
553+
}
539554
};
540555

541556
/// Get the operands corresponding to the given \p Pred value. By default, the

llvm/lib/Target/ARM/ARMBaseRegisterInfo.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,17 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
240240
unsigned SrcSubReg) const override;
241241

242242
int getSEHRegNum(unsigned i) const { return getEncodingValue(i); }
243+
244+
const TargetRegisterClass *
245+
getTargetRegisterClass(const TargetRegisterClass *RC) const override {
246+
if (ARM::MQPRRegClass.hasSubClassEq(RC))
247+
return &ARM::MQPRRegClass;
248+
if (ARM::SPRRegClass.hasSubClassEq(RC))
249+
return &ARM::SPRRegClass;
250+
if (ARM::DPR_VFP2RegClass.hasSubClassEq(RC))
251+
return &ARM::DPR_VFP2RegClass;
252+
return RC;
253+
}
243254
};
244255

245256
} // end namespace llvm

llvm/lib/Target/ARM/ARMInstrInfo.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6532,3 +6532,14 @@ let isPseudo = 1 in {
65326532
let isTerminator = 1 in
65336533
def SEH_EpilogEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
65346534
}
6535+
6536+
6537+
//===----------------------------------------------------------------------===//
6538+
// Pseudo Instructions for use when early-clobber is defined and Greedy Register
6539+
// Allocation is used. This ensures the constraint is used properly.
6540+
//===----------------------------------------------------------------------===//
6541+
let isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
6542+
def PseudoARMInitUndefMQPR : PseudoInst<(outs MQPR:$vd), (ins), NoItinerary, []>;
6543+
def PseudoARMInitUndefSPR : PseudoInst<(outs SPR:$sd), (ins), NoItinerary, []>;
6544+
def PseudoARMInitUndefDPR_VFP2 : PseudoInst<(outs DPR_VFP2:$dd), (ins), NoItinerary, []>;
6545+
}

llvm/lib/Target/ARM/ARMSubtarget.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,14 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
278278
return &InstrInfo->getRegisterInfo();
279279
}
280280

281+
/// Returns true as the ARM Architecture is supported by the Init Undef Pass.
282+
/// We want to enable this for MVE and NEON instructions, however this can be
283+
/// easily expanded by adding more Pseudo Instructions for the relevant
284+
/// Register types.
285+
bool supportsInitUndef() const override {
286+
return HasMVEIntegerOps || HasNEON;
287+
}
288+
281289
const CallLowering *getCallLowering() const override;
282290
InstructionSelector *getInstructionSelector() const override;
283291
const LegalizerInfo *getLegalizerInfo() const override;

llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,17 @@ entry:
699699
ret <4 x i32> %0
700700
}
701701

702+
define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_s32_undef() {
703+
; CHECK-LABEL: test_vhcaddq_rot270_s32_undef:
704+
; CHECK: @ %bb.0: @ %entry
705+
; CHECK-NEXT: vhcadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270
706+
; CHECK-NOT: vhcadd.s32 q[[REG:[0-9]+]], q{{[0-9]+}}, q[[REG]], #270
707+
; CHECK-NEXT: bx lr
708+
entry:
709+
%0 = tail call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 1, <4 x i32> undef, <4 x i32> undef)
710+
ret <4 x i32> %0
711+
}
712+
702713
define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
703714
; CHECK-LABEL: test_vhcaddq_rot90_x_s8:
704715
; CHECK: @ %bb.0: @ %entry

llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,13 +373,13 @@ define arm_aapcs_vfpcc void @mul_i32(ptr %A, ptr %B, i64 %C, ptr %D) {
373373
; CHECK-NEXT: vldrw.u32 q1, [r0]
374374
; CHECK-NEXT: vldrw.u32 q0, [r1]
375375
; CHECK-NEXT: ldr.w lr, [sp, #20]
376-
; CHECK-NEXT: vmov.f32 s10, s1
377376
; CHECK-NEXT: vmov.f32 s14, s5
377+
; CHECK-NEXT: vmov.f32 s10, s1
378378
; CHECK-NEXT: vmov r5, s4
379379
; CHECK-NEXT: vmov.f32 s4, s6
380380
; CHECK-NEXT: vmov.f32 s6, s7
381-
; CHECK-NEXT: vmov r0, s10
382381
; CHECK-NEXT: vmov r1, s14
382+
; CHECK-NEXT: vmov r0, s10
383383
; CHECK-NEXT: smull r12, r3, r1, r0
384384
; CHECK-NEXT: vmov r0, s0
385385
; CHECK-NEXT: vmov.f32 s0, s2

llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll

Lines changed: 57 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -222,88 +222,88 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no
222222
; CHECK-NEXT: vldrw.u32 q1, [r4]
223223
; CHECK-NEXT: .LBB1_4: @ %vector.body
224224
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
225-
; CHECK-NEXT: vldrw.u32 q4, [r5], #16
226-
; CHECK-NEXT: vldrw.u32 q3, [r0], #16
225+
; CHECK-NEXT: vldrw.u32 q3, [r5], #16
226+
; CHECK-NEXT: vldrw.u32 q2, [r0], #16
227227
; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
228228
; CHECK-NEXT: mov.w r2, #-1
229-
; CHECK-NEXT: vmov.f32 s8, s14
229+
; CHECK-NEXT: vmov.f32 s16, s10
230230
; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill
231-
; CHECK-NEXT: vmov.f32 s20, s18
231+
; CHECK-NEXT: vmov.f32 s20, s14
232+
; CHECK-NEXT: vmov.f32 s18, s11
233+
; CHECK-NEXT: vmov.f32 s22, s15
232234
; CHECK-NEXT: mov.w r8, #0
233-
; CHECK-NEXT: vmov.f32 s10, s15
234-
; CHECK-NEXT: vmov.f32 s22, s19
235-
; CHECK-NEXT: vmullb.s32 q6, q5, q2
236-
; CHECK-NEXT: vmov.f32 s18, s17
235+
; CHECK-NEXT: vmullb.s32 q6, q5, q4
236+
; CHECK-NEXT: vmov.f32 s14, s13
237237
; CHECK-NEXT: vmov r4, r7, d12
238238
; CHECK-NEXT: asrl r4, r7, #31
239-
; CHECK-NEXT: vmov.f32 s14, s13
239+
; CHECK-NEXT: vmov.f32 s10, s9
240240
; CHECK-NEXT: rsbs.w r5, r4, #-2147483648
241241
; CHECK-NEXT: sbcs.w r5, r2, r7
242242
; CHECK-NEXT: csetm r5, lt
243243
; CHECK-NEXT: bfi r8, r5, #0, #8
244244
; CHECK-NEXT: vmov r10, r5, d13
245245
; CHECK-NEXT: asrl r10, r5, #31
246-
; CHECK-NEXT: vmov r6, s18
246+
; CHECK-NEXT: vmov r6, s14
247247
; CHECK-NEXT: rsbs.w r3, r10, #-2147483648
248-
; CHECK-NEXT: vmov q2[2], q2[0], r4, r10
248+
; CHECK-NEXT: vmov q4[2], q4[0], r4, r10
249249
; CHECK-NEXT: sbcs.w r3, r2, r5
250-
; CHECK-NEXT: vmov q2[3], q2[1], r7, r5
250+
; CHECK-NEXT: vmov q4[3], q4[1], r7, r5
251251
; CHECK-NEXT: csetm r3, lt
252252
; CHECK-NEXT: bfi r8, r3, #8, #8
253253
; CHECK-NEXT: vmsr p0, r8
254254
; CHECK-NEXT: mvn r8, #-2147483648
255-
; CHECK-NEXT: vpsel q2, q2, q0
256-
; CHECK-NEXT: vmov r3, r4, d4
255+
; CHECK-NEXT: vpsel q4, q4, q0
256+
; CHECK-NEXT: vmov r3, r4, d8
257257
; CHECK-NEXT: subs.w r3, r3, r8
258258
; CHECK-NEXT: sbcs r3, r4, #0
259259
; CHECK-NEXT: mov.w r4, #0
260260
; CHECK-NEXT: csetm r3, lt
261261
; CHECK-NEXT: bfi r4, r3, #0, #8
262-
; CHECK-NEXT: vmov r3, r5, d5
262+
; CHECK-NEXT: vmov r3, r5, d9
263263
; CHECK-NEXT: subs.w r3, r3, r8
264264
; CHECK-NEXT: sbcs r3, r5, #0
265265
; CHECK-NEXT: mov.w r5, #0
266266
; CHECK-NEXT: csetm r3, lt
267267
; CHECK-NEXT: bfi r4, r3, #8, #8
268-
; CHECK-NEXT: vmov r3, s12
268+
; CHECK-NEXT: vmov r3, s8
269269
; CHECK-NEXT: vmsr p0, r4
270-
; CHECK-NEXT: vmov r4, s16
271-
; CHECK-NEXT: vpsel q2, q2, q1
270+
; CHECK-NEXT: vmov r4, s12
271+
; CHECK-NEXT: vpsel q4, q4, q1
272272
; CHECK-NEXT: smull r4, r7, r4, r3
273273
; CHECK-NEXT: asrl r4, r7, #31
274274
; CHECK-NEXT: rsbs.w r3, r4, #-2147483648
275275
; CHECK-NEXT: sbcs.w r3, r2, r7
276276
; CHECK-NEXT: csetm r3, lt
277277
; CHECK-NEXT: bfi r5, r3, #0, #8
278-
; CHECK-NEXT: vmov r3, s14
278+
; CHECK-NEXT: vmov r3, s10
279279
; CHECK-NEXT: smull r6, r3, r6, r3
280280
; CHECK-NEXT: asrl r6, r3, #31
281281
; CHECK-NEXT: rsbs.w r1, r6, #-2147483648
282-
; CHECK-NEXT: vmov q3[2], q3[0], r4, r6
282+
; CHECK-NEXT: vmov q2[2], q2[0], r4, r6
283283
; CHECK-NEXT: sbcs.w r1, r2, r3
284-
; CHECK-NEXT: vmov q3[3], q3[1], r7, r3
284+
; CHECK-NEXT: vmov q2[3], q2[1], r7, r3
285285
; CHECK-NEXT: csetm r1, lt
286286
; CHECK-NEXT: bfi r5, r1, #8, #8
287287
; CHECK-NEXT: vmsr p0, r5
288288
; CHECK-NEXT: ldrd r5, r2, [sp, #8] @ 8-byte Folded Reload
289-
; CHECK-NEXT: vpsel q3, q3, q0
290-
; CHECK-NEXT: vmov r1, r3, d6
289+
; CHECK-NEXT: vpsel q2, q2, q0
290+
; CHECK-NEXT: vmov r1, r3, d4
291291
; CHECK-NEXT: subs.w r1, r1, r8
292292
; CHECK-NEXT: sbcs r1, r3, #0
293293
; CHECK-NEXT: mov.w r3, #0
294294
; CHECK-NEXT: csetm r1, lt
295295
; CHECK-NEXT: bfi r3, r1, #0, #8
296-
; CHECK-NEXT: vmov r1, r4, d7
296+
; CHECK-NEXT: vmov r1, r4, d5
297297
; CHECK-NEXT: subs.w r1, r1, r8
298298
; CHECK-NEXT: sbcs r1, r4, #0
299299
; CHECK-NEXT: csetm r1, lt
300300
; CHECK-NEXT: bfi r3, r1, #8, #8
301301
; CHECK-NEXT: vmsr p0, r3
302-
; CHECK-NEXT: vpsel q3, q3, q1
303-
; CHECK-NEXT: vmov.f32 s13, s14
304-
; CHECK-NEXT: vmov.f32 s14, s8
305-
; CHECK-NEXT: vmov.f32 s15, s10
306-
; CHECK-NEXT: vstrb.8 q3, [r2], #16
302+
; CHECK-NEXT: vpsel q2, q2, q1
303+
; CHECK-NEXT: vmov.f32 s9, s10
304+
; CHECK-NEXT: vmov.f32 s10, s16
305+
; CHECK-NEXT: vmov.f32 s11, s18
306+
; CHECK-NEXT: vstrb.8 q2, [r2], #16
307307
; CHECK-NEXT: le lr, .LBB1_4
308308
; CHECK-NEXT: @ %bb.5: @ %middle.block
309309
; CHECK-NEXT: ldrd r1, r3, [sp] @ 8-byte Folded Reload
@@ -462,14 +462,14 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n
462462
; CHECK-NEXT: vcmp.u32 cs, q1, q4
463463
; CHECK-NEXT: vstr p0, [sp, #20] @ 4-byte Spill
464464
; CHECK-NEXT: vpstt
465-
; CHECK-NEXT: vldrwt.u32 q5, [r0], #16
466-
; CHECK-NEXT: vldrwt.u32 q6, [r1], #16
467-
; CHECK-NEXT: vmov.f32 s16, s22
468-
; CHECK-NEXT: vmov.f32 s18, s23
469-
; CHECK-NEXT: vmov.f32 s28, s26
470-
; CHECK-NEXT: vmov.f32 s30, s27
471-
; CHECK-NEXT: vmullb.s32 q0, q7, q4
472-
; CHECK-NEXT: vmov.f32 s22, s25
465+
; CHECK-NEXT: vldrwt.u32 q4, [r0], #16
466+
; CHECK-NEXT: vldrwt.u32 q5, [r1], #16
467+
; CHECK-NEXT: vmov.f32 s24, s18
468+
; CHECK-NEXT: vmov.f32 s26, s19
469+
; CHECK-NEXT: vmov.f32 s28, s22
470+
; CHECK-NEXT: vmov.f32 s30, s23
471+
; CHECK-NEXT: vmullb.s32 q0, q7, q6
472+
; CHECK-NEXT: vmov.f32 s18, s21
473473
; CHECK-NEXT: vmov r10, r5, d0
474474
; CHECK-NEXT: asrl r10, r5, #31
475475
; CHECK-NEXT: rsbs.w r7, r10, #-2147483648
@@ -483,7 +483,7 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n
483483
; CHECK-NEXT: sbcs.w r3, r12, r7
484484
; CHECK-NEXT: vmov q0[3], q0[1], r5, r7
485485
; CHECK-NEXT: csetm r3, lt
486-
; CHECK-NEXT: vmov r7, s22
486+
; CHECK-NEXT: vmov r7, s18
487487
; CHECK-NEXT: bfi r4, r3, #8, #8
488488
; CHECK-NEXT: vmsr p0, r4
489489
; CHECK-NEXT: vpsel q0, q0, q2
@@ -498,11 +498,11 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n
498498
; CHECK-NEXT: sbcs r3, r5, #0
499499
; CHECK-NEXT: csetm r3, lt
500500
; CHECK-NEXT: bfi r4, r3, #8, #8
501-
; CHECK-NEXT: vmov r3, s20
501+
; CHECK-NEXT: vmov r3, s16
502502
; CHECK-NEXT: vmsr p0, r4
503-
; CHECK-NEXT: vmov r4, s24
504-
; CHECK-NEXT: vpsel q4, q0, q3
505-
; CHECK-NEXT: vmov.f32 s2, s21
503+
; CHECK-NEXT: vmov r4, s20
504+
; CHECK-NEXT: vpsel q6, q0, q3
505+
; CHECK-NEXT: vmov.f32 s2, s17
506506
; CHECK-NEXT: smull r10, r5, r4, r3
507507
; CHECK-NEXT: movs r4, #0
508508
; CHECK-NEXT: asrl r10, r5, #31
@@ -536,8 +536,8 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n
536536
; CHECK-NEXT: vpsel q0, q0, q3
537537
; CHECK-NEXT: vldr p0, [sp, #20] @ 4-byte Reload
538538
; CHECK-NEXT: vmov.f32 s1, s2
539-
; CHECK-NEXT: vmov.f32 s2, s16
540-
; CHECK-NEXT: vmov.f32 s3, s18
539+
; CHECK-NEXT: vmov.f32 s2, s24
540+
; CHECK-NEXT: vmov.f32 s3, s26
541541
; CHECK-NEXT: vpst
542542
; CHECK-NEXT: vstrwt.32 q0, [r2], #16
543543
; CHECK-NEXT: le lr, .LBB2_2
@@ -778,34 +778,34 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no
778778
; CHECK-NEXT: .LBB4_4: @ %vector.body
779779
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
780780
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
781-
; CHECK-NEXT: vldrw.u32 q3, [r1], #16
782-
; CHECK-NEXT: vmov.f32 s8, s6
783-
; CHECK-NEXT: vmov.f32 s16, s14
784-
; CHECK-NEXT: vmov.f32 s10, s7
785-
; CHECK-NEXT: vmov.f32 s18, s15
786-
; CHECK-NEXT: vmullb.u32 q5, q4, q2
781+
; CHECK-NEXT: vldrw.u32 q2, [r1], #16
782+
; CHECK-NEXT: vmov.f32 s12, s6
783+
; CHECK-NEXT: vmov.f32 s16, s10
784+
; CHECK-NEXT: vmov.f32 s14, s7
785+
; CHECK-NEXT: vmov.f32 s18, s11
786+
; CHECK-NEXT: vmullb.u32 q5, q4, q3
787787
; CHECK-NEXT: vmov.f32 s6, s5
788788
; CHECK-NEXT: vmov r10, r5, d10
789789
; CHECK-NEXT: lsrl r10, r5, #31
790-
; CHECK-NEXT: vmov.f32 s14, s13
790+
; CHECK-NEXT: vmov.f32 s10, s9
791791
; CHECK-NEXT: subs.w r6, r10, #-1
792-
; CHECK-NEXT: vmullb.u32 q4, q3, q1
793792
; CHECK-NEXT: sbcs r5, r5, #0
794793
; CHECK-NEXT: mov.w r6, #0
795794
; CHECK-NEXT: csetm r5, lo
796795
; CHECK-NEXT: bfi r6, r5, #0, #8
797796
; CHECK-NEXT: vmov r4, r5, d11
798797
; CHECK-NEXT: lsrl r4, r5, #31
799798
; CHECK-NEXT: subs.w r7, r4, #-1
800-
; CHECK-NEXT: vmov q2[2], q2[0], r10, r4
799+
; CHECK-NEXT: vmov q3[2], q3[0], r10, r4
801800
; CHECK-NEXT: sbcs r5, r5, #0
802801
; CHECK-NEXT: csetm r5, lo
803802
; CHECK-NEXT: bfi r6, r5, #8, #8
803+
; CHECK-NEXT: vmsr p0, r6
804+
; CHECK-NEXT: vpsel q3, q3, q0
805+
; CHECK-NEXT: vmullb.u32 q4, q2, q1
804806
; CHECK-NEXT: vmov r10, r5, d8
805807
; CHECK-NEXT: lsrl r10, r5, #31
806-
; CHECK-NEXT: vmsr p0, r6
807808
; CHECK-NEXT: subs.w r6, r10, #-1
808-
; CHECK-NEXT: vpsel q2, q2, q0
809809
; CHECK-NEXT: sbcs r5, r5, #0
810810
; CHECK-NEXT: mov.w r6, #0
811811
; CHECK-NEXT: csetm r5, lo
@@ -820,8 +820,8 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no
820820
; CHECK-NEXT: vmsr p0, r6
821821
; CHECK-NEXT: vpsel q1, q1, q0
822822
; CHECK-NEXT: vmov.f32 s5, s6
823-
; CHECK-NEXT: vmov.f32 s6, s8
824-
; CHECK-NEXT: vmov.f32 s7, s10
823+
; CHECK-NEXT: vmov.f32 s6, s12
824+
; CHECK-NEXT: vmov.f32 s7, s14
825825
; CHECK-NEXT: vstrb.8 q1, [r2], #16
826826
; CHECK-NEXT: le lr, .LBB4_4
827827
; CHECK-NEXT: @ %bb.5: @ %middle.block

0 commit comments

Comments
 (0)