Skip to content

Commit 0113f26

Browse files
[AArch64][SME] Enable subreg liveness tracking for AArch64 (#92142)
The SME dot instructions in these tests operate on contiguous register tuples which use one subregister from each of the loads. When using the strided register form for all loads, enabling subreg liveness tracking will allow us to recognise that there is no overlap between the register tuples used by each of the dot instructions. This is the first in a series of patches to improve the allocation of strided and contiguous registers for SME.
1 parent ad702e0 commit 0113f26

File tree

72 files changed

+2559
-5465
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+2559
-5465
lines changed

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
149149
const Triple &getTargetTriple() const { return TargetTriple; }
150150
bool enableMachineScheduler() const override { return true; }
151151
bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
152+
bool enableSubRegLiveness() const override { return true; }
152153

153154
bool enableMachinePipeliner() const override;
154155
bool useDFAforSMS() const override { return false; }

llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2273,10 +2273,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
22732273
; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
22742274
; -O1: ldp x4, x5, [x0]
22752275
; -O1: and x8, x4, x2
2276-
; -O1: and x9, x7, x3
2277-
; -O1: mvn x10, x8
2278-
; -O1: mvn x11, x9
2279-
; -O1: casp x4, x5, x10, x11, [x0]
2276+
; -O1: and x9, x5, x3
2277+
; -O1: mvn x8, x8
2278+
; -O1: mvn x9, x9
2279+
; -O1: casp x4, x5, x8, x9, [x0]
22802280
; -O1: cmp x5, x7
22812281
; -O1: ccmp x4, x6, #0, eq
22822282
%r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16
@@ -2298,10 +2298,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
22982298
; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
22992299
; -O1: ldp x4, x5, [x0]
23002300
; -O1: and x8, x4, x2
2301-
; -O1: and x9, x7, x3
2302-
; -O1: mvn x10, x8
2303-
; -O1: mvn x11, x9
2304-
; -O1: caspa x4, x5, x10, x11, [x0]
2301+
; -O1: and x9, x5, x3
2302+
; -O1: mvn x8, x8
2303+
; -O1: mvn x9, x9
2304+
; -O1: caspa x4, x5, x8, x9, [x0]
23052305
; -O1: cmp x5, x7
23062306
; -O1: ccmp x4, x6, #0, eq
23072307
%r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16
@@ -2323,10 +2323,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
23232323
; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
23242324
; -O1: ldp x4, x5, [x0]
23252325
; -O1: and x8, x4, x2
2326-
; -O1: and x9, x7, x3
2327-
; -O1: mvn x10, x8
2328-
; -O1: mvn x11, x9
2329-
; -O1: caspl x4, x5, x10, x11, [x0]
2326+
; -O1: and x9, x5, x3
2327+
; -O1: mvn x8, x8
2328+
; -O1: mvn x9, x9
2329+
; -O1: caspl x4, x5, x8, x9, [x0]
23302330
; -O1: cmp x5, x7
23312331
; -O1: ccmp x4, x6, #0, eq
23322332
%r = atomicrmw nand ptr %ptr, i128 %value release, align 16
@@ -2348,10 +2348,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
23482348
; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
23492349
; -O1: ldp x4, x5, [x0]
23502350
; -O1: and x8, x4, x2
2351-
; -O1: and x9, x7, x3
2352-
; -O1: mvn x10, x8
2353-
; -O1: mvn x11, x9
2354-
; -O1: caspal x4, x5, x10, x11, [x0]
2351+
; -O1: and x9, x5, x3
2352+
; -O1: mvn x8, x8
2353+
; -O1: mvn x9, x9
2354+
; -O1: caspal x4, x5, x8, x9, [x0]
23552355
; -O1: cmp x5, x7
23562356
; -O1: ccmp x4, x6, #0, eq
23572357
%r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16
@@ -2373,10 +2373,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
23732373
; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
23742374
; -O1: ldp x4, x5, [x0]
23752375
; -O1: and x8, x4, x2
2376-
; -O1: and x9, x7, x3
2377-
; -O1: mvn x10, x8
2378-
; -O1: mvn x11, x9
2379-
; -O1: caspal x4, x5, x10, x11, [x0]
2376+
; -O1: and x9, x5, x3
2377+
; -O1: mvn x8, x8
2378+
; -O1: mvn x9, x9
2379+
; -O1: caspal x4, x5, x8, x9, [x0]
23802380
; -O1: cmp x5, x7
23812381
; -O1: ccmp x4, x6, #0, eq
23822382
%r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16
@@ -3406,7 +3406,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
34063406
; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
34073407
; -O1: ldp x4, x5, [x0]
34083408
; -O1: eor x8, x4, x2
3409-
; -O1: eor x9, x7, x3
3409+
; -O1: eor x9, x5, x3
34103410
; -O1: casp x4, x5, x8, x9, [x0]
34113411
; -O1: cmp x5, x7
34123412
; -O1: ccmp x4, x6, #0, eq
@@ -3427,7 +3427,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
34273427
; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
34283428
; -O1: ldp x4, x5, [x0]
34293429
; -O1: eor x8, x4, x2
3430-
; -O1: eor x9, x7, x3
3430+
; -O1: eor x9, x5, x3
34313431
; -O1: caspa x4, x5, x8, x9, [x0]
34323432
; -O1: cmp x5, x7
34333433
; -O1: ccmp x4, x6, #0, eq
@@ -3448,7 +3448,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
34483448
; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
34493449
; -O1: ldp x4, x5, [x0]
34503450
; -O1: eor x8, x4, x2
3451-
; -O1: eor x9, x7, x3
3451+
; -O1: eor x9, x5, x3
34523452
; -O1: caspl x4, x5, x8, x9, [x0]
34533453
; -O1: cmp x5, x7
34543454
; -O1: ccmp x4, x6, #0, eq
@@ -3469,7 +3469,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
34693469
; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
34703470
; -O1: ldp x4, x5, [x0]
34713471
; -O1: eor x8, x4, x2
3472-
; -O1: eor x9, x7, x3
3472+
; -O1: eor x9, x5, x3
34733473
; -O1: caspal x4, x5, x8, x9, [x0]
34743474
; -O1: cmp x5, x7
34753475
; -O1: ccmp x4, x6, #0, eq
@@ -3490,7 +3490,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
34903490
; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
34913491
; -O1: ldp x4, x5, [x0]
34923492
; -O1: eor x8, x4, x2
3493-
; -O1: eor x9, x7, x3
3493+
; -O1: eor x9, x5, x3
34943494
; -O1: caspal x4, x5, x8, x9, [x0]
34953495
; -O1: cmp x5, x7
34963496
; -O1: ccmp x4, x6, #0, eq
@@ -3947,7 +3947,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
39473947
; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
39483948
; -O1: ldp x4, x5, [x0]
39493949
; -O1: cmp x2, x4
3950-
; -O1: csel x9, x7, x3, lt
3950+
; -O1: csel x9, x5, x3, lt
39513951
; -O1: csel x8, x4, x2, lt
39523952
; -O1: casp x4, x5, x8, x9, [x0]
39533953
; -O1: cmp x5, x7
@@ -3975,7 +3975,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
39753975
; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
39763976
; -O1: ldp x4, x5, [x0]
39773977
; -O1: cmp x2, x4
3978-
; -O1: csel x9, x7, x3, lt
3978+
; -O1: csel x9, x5, x3, lt
39793979
; -O1: csel x8, x4, x2, lt
39803980
; -O1: caspa x4, x5, x8, x9, [x0]
39813981
; -O1: cmp x5, x7
@@ -4003,7 +4003,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
40034003
; -O1-LABEL: atomicrmw_max_i128_aligned_release:
40044004
; -O1: ldp x4, x5, [x0]
40054005
; -O1: cmp x2, x4
4006-
; -O1: csel x9, x7, x3, lt
4006+
; -O1: csel x9, x5, x3, lt
40074007
; -O1: csel x8, x4, x2, lt
40084008
; -O1: caspl x4, x5, x8, x9, [x0]
40094009
; -O1: cmp x5, x7
@@ -4031,7 +4031,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
40314031
; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
40324032
; -O1: ldp x4, x5, [x0]
40334033
; -O1: cmp x2, x4
4034-
; -O1: csel x9, x7, x3, lt
4034+
; -O1: csel x9, x5, x3, lt
40354035
; -O1: csel x8, x4, x2, lt
40364036
; -O1: caspal x4, x5, x8, x9, [x0]
40374037
; -O1: cmp x5, x7
@@ -4059,7 +4059,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
40594059
; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
40604060
; -O1: ldp x4, x5, [x0]
40614061
; -O1: cmp x2, x4
4062-
; -O1: csel x9, x7, x3, lt
4062+
; -O1: csel x9, x5, x3, lt
40634063
; -O1: csel x8, x4, x2, lt
40644064
; -O1: caspal x4, x5, x8, x9, [x0]
40654065
; -O1: cmp x5, x7
@@ -4592,7 +4592,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
45924592
; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
45934593
; -O1: ldp x4, x5, [x0]
45944594
; -O1: cmp x2, x4
4595-
; -O1: csel x9, x7, x3, ge
4595+
; -O1: csel x9, x5, x3, ge
45964596
; -O1: csel x8, x4, x2, ge
45974597
; -O1: casp x4, x5, x8, x9, [x0]
45984598
; -O1: cmp x5, x7
@@ -4620,7 +4620,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
46204620
; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
46214621
; -O1: ldp x4, x5, [x0]
46224622
; -O1: cmp x2, x4
4623-
; -O1: csel x9, x7, x3, ge
4623+
; -O1: csel x9, x5, x3, ge
46244624
; -O1: csel x8, x4, x2, ge
46254625
; -O1: caspa x4, x5, x8, x9, [x0]
46264626
; -O1: cmp x5, x7
@@ -4648,7 +4648,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
46484648
; -O1-LABEL: atomicrmw_min_i128_aligned_release:
46494649
; -O1: ldp x4, x5, [x0]
46504650
; -O1: cmp x2, x4
4651-
; -O1: csel x9, x7, x3, ge
4651+
; -O1: csel x9, x5, x3, ge
46524652
; -O1: csel x8, x4, x2, ge
46534653
; -O1: caspl x4, x5, x8, x9, [x0]
46544654
; -O1: cmp x5, x7
@@ -4676,7 +4676,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
46764676
; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
46774677
; -O1: ldp x4, x5, [x0]
46784678
; -O1: cmp x2, x4
4679-
; -O1: csel x9, x7, x3, ge
4679+
; -O1: csel x9, x5, x3, ge
46804680
; -O1: csel x8, x4, x2, ge
46814681
; -O1: caspal x4, x5, x8, x9, [x0]
46824682
; -O1: cmp x5, x7
@@ -4704,7 +4704,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
47044704
; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
47054705
; -O1: ldp x4, x5, [x0]
47064706
; -O1: cmp x2, x4
4707-
; -O1: csel x9, x7, x3, ge
4707+
; -O1: csel x9, x5, x3, ge
47084708
; -O1: csel x8, x4, x2, ge
47094709
; -O1: caspal x4, x5, x8, x9, [x0]
47104710
; -O1: cmp x5, x7
@@ -5237,7 +5237,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
52375237
; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
52385238
; -O1: ldp x4, x5, [x0]
52395239
; -O1: cmp x2, x4
5240-
; -O1: csel x9, x7, x3, lo
5240+
; -O1: csel x9, x5, x3, lo
52415241
; -O1: csel x8, x4, x2, lo
52425242
; -O1: casp x4, x5, x8, x9, [x0]
52435243
; -O1: cmp x5, x7
@@ -5265,7 +5265,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
52655265
; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
52665266
; -O1: ldp x4, x5, [x0]
52675267
; -O1: cmp x2, x4
5268-
; -O1: csel x9, x7, x3, lo
5268+
; -O1: csel x9, x5, x3, lo
52695269
; -O1: csel x8, x4, x2, lo
52705270
; -O1: caspa x4, x5, x8, x9, [x0]
52715271
; -O1: cmp x5, x7
@@ -5293,7 +5293,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
52935293
; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
52945294
; -O1: ldp x4, x5, [x0]
52955295
; -O1: cmp x2, x4
5296-
; -O1: csel x9, x7, x3, lo
5296+
; -O1: csel x9, x5, x3, lo
52975297
; -O1: csel x8, x4, x2, lo
52985298
; -O1: caspl x4, x5, x8, x9, [x0]
52995299
; -O1: cmp x5, x7
@@ -5321,7 +5321,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
53215321
; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
53225322
; -O1: ldp x4, x5, [x0]
53235323
; -O1: cmp x2, x4
5324-
; -O1: csel x9, x7, x3, lo
5324+
; -O1: csel x9, x5, x3, lo
53255325
; -O1: csel x8, x4, x2, lo
53265326
; -O1: caspal x4, x5, x8, x9, [x0]
53275327
; -O1: cmp x5, x7
@@ -5349,7 +5349,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
53495349
; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
53505350
; -O1: ldp x4, x5, [x0]
53515351
; -O1: cmp x2, x4
5352-
; -O1: csel x9, x7, x3, lo
5352+
; -O1: csel x9, x5, x3, lo
53535353
; -O1: csel x8, x4, x2, lo
53545354
; -O1: caspal x4, x5, x8, x9, [x0]
53555355
; -O1: cmp x5, x7
@@ -5877,7 +5877,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
58775877
; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
58785878
; -O1: ldp x4, x5, [x0]
58795879
; -O1: cmp x2, x4
5880-
; -O1: csel x9, x7, x3, hs
5880+
; -O1: csel x9, x5, x3, hs
58815881
; -O1: csel x8, x4, x2, hs
58825882
; -O1: casp x4, x5, x8, x9, [x0]
58835883
; -O1: cmp x5, x7
@@ -5905,7 +5905,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
59055905
; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
59065906
; -O1: ldp x4, x5, [x0]
59075907
; -O1: cmp x2, x4
5908-
; -O1: csel x9, x7, x3, hs
5908+
; -O1: csel x9, x5, x3, hs
59095909
; -O1: csel x8, x4, x2, hs
59105910
; -O1: caspa x4, x5, x8, x9, [x0]
59115911
; -O1: cmp x5, x7
@@ -5933,7 +5933,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
59335933
; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
59345934
; -O1: ldp x4, x5, [x0]
59355935
; -O1: cmp x2, x4
5936-
; -O1: csel x9, x7, x3, hs
5936+
; -O1: csel x9, x5, x3, hs
59375937
; -O1: csel x8, x4, x2, hs
59385938
; -O1: caspl x4, x5, x8, x9, [x0]
59395939
; -O1: cmp x5, x7
@@ -5961,7 +5961,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
59615961
; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
59625962
; -O1: ldp x4, x5, [x0]
59635963
; -O1: cmp x2, x4
5964-
; -O1: csel x9, x7, x3, hs
5964+
; -O1: csel x9, x5, x3, hs
59655965
; -O1: csel x8, x4, x2, hs
59665966
; -O1: caspal x4, x5, x8, x9, [x0]
59675967
; -O1: cmp x5, x7
@@ -5989,7 +5989,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
59895989
; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
59905990
; -O1: ldp x4, x5, [x0]
59915991
; -O1: cmp x2, x4
5992-
; -O1: csel x9, x7, x3, hs
5992+
; -O1: csel x9, x5, x3, hs
59935993
; -O1: csel x8, x4, x2, hs
59945994
; -O1: caspal x4, x5, x8, x9, [x0]
59955995
; -O1: cmp x5, x7

0 commit comments

Comments
 (0)