Skip to content

Commit c537c75

Browse files
committed
[AArch64][GlobalISel] Scalarize i128 vector sadd_sat/uadd_sat/etc.
As with other operations we scalarize any vectors with larger types to let the scalare legalization kick in.
1 parent 84fec77 commit c537c75

File tree

5 files changed

+133
-56
lines changed

5 files changed

+133
-56
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,6 +1303,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
13031303
.clampNumElements(0, v4s16, v8s16)
13041304
.clampNumElements(0, v2s32, v4s32)
13051305
.clampMaxNumElements(0, s64, 2)
1306+
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
13061307
.moreElementsToNextPow2(0)
13071308
.lower();
13081309

llvm/test/CodeGen/AArch64/sadd_sat_vec.ll

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
; CHECK-GI: warning: Instruction selection used fallback path for v16i4
66
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128
87

98
declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>)
109
declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>)
@@ -498,21 +497,45 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
498497
}
499498

500499
define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
501-
; CHECK-LABEL: v2i128:
502-
; CHECK: // %bb.0:
503-
; CHECK-NEXT: adds x8, x0, x4
504-
; CHECK-NEXT: adcs x9, x1, x5
505-
; CHECK-NEXT: asr x10, x9, #63
506-
; CHECK-NEXT: eor x11, x10, #0x8000000000000000
507-
; CHECK-NEXT: csel x0, x10, x8, vs
508-
; CHECK-NEXT: csel x1, x11, x9, vs
509-
; CHECK-NEXT: adds x8, x2, x6
510-
; CHECK-NEXT: adcs x9, x3, x7
511-
; CHECK-NEXT: asr x10, x9, #63
512-
; CHECK-NEXT: eor x11, x10, #0x8000000000000000
513-
; CHECK-NEXT: csel x2, x10, x8, vs
514-
; CHECK-NEXT: csel x3, x11, x9, vs
515-
; CHECK-NEXT: ret
500+
; CHECK-SD-LABEL: v2i128:
501+
; CHECK-SD: // %bb.0:
502+
; CHECK-SD-NEXT: adds x8, x0, x4
503+
; CHECK-SD-NEXT: adcs x9, x1, x5
504+
; CHECK-SD-NEXT: asr x10, x9, #63
505+
; CHECK-SD-NEXT: eor x11, x10, #0x8000000000000000
506+
; CHECK-SD-NEXT: csel x0, x10, x8, vs
507+
; CHECK-SD-NEXT: csel x1, x11, x9, vs
508+
; CHECK-SD-NEXT: adds x8, x2, x6
509+
; CHECK-SD-NEXT: adcs x9, x3, x7
510+
; CHECK-SD-NEXT: asr x10, x9, #63
511+
; CHECK-SD-NEXT: eor x11, x10, #0x8000000000000000
512+
; CHECK-SD-NEXT: csel x2, x10, x8, vs
513+
; CHECK-SD-NEXT: csel x3, x11, x9, vs
514+
; CHECK-SD-NEXT: ret
515+
;
516+
; CHECK-GI-LABEL: v2i128:
517+
; CHECK-GI: // %bb.0:
518+
; CHECK-GI-NEXT: adds x9, x0, x4
519+
; CHECK-GI-NEXT: mov w8, wzr
520+
; CHECK-GI-NEXT: mov x13, #-9223372036854775808 // =0x8000000000000000
521+
; CHECK-GI-NEXT: adcs x10, x1, x5
522+
; CHECK-GI-NEXT: asr x11, x10, #63
523+
; CHECK-GI-NEXT: cset w12, vs
524+
; CHECK-GI-NEXT: cmp w8, #1
525+
; CHECK-GI-NEXT: adc x14, x11, x13
526+
; CHECK-GI-NEXT: tst w12, #0x1
527+
; CHECK-GI-NEXT: csel x0, x11, x9, ne
528+
; CHECK-GI-NEXT: csel x1, x14, x10, ne
529+
; CHECK-GI-NEXT: adds x9, x2, x6
530+
; CHECK-GI-NEXT: adcs x10, x3, x7
531+
; CHECK-GI-NEXT: asr x11, x10, #63
532+
; CHECK-GI-NEXT: cset w12, vs
533+
; CHECK-GI-NEXT: cmp w8, #1
534+
; CHECK-GI-NEXT: adc x8, x11, x13
535+
; CHECK-GI-NEXT: tst w12, #0x1
536+
; CHECK-GI-NEXT: csel x2, x11, x9, ne
537+
; CHECK-GI-NEXT: csel x3, x8, x10, ne
538+
; CHECK-GI-NEXT: ret
516539
%z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
517540
ret <2 x i128> %z
518541
}

llvm/test/CodeGen/AArch64/ssub_sat_vec.ll

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
; CHECK-GI: warning: Instruction selection used fallback path for v16i4
66
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128
87

98
declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>)
109
declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>)
@@ -501,21 +500,45 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
501500
}
502501

503502
define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
504-
; CHECK-LABEL: v2i128:
505-
; CHECK: // %bb.0:
506-
; CHECK-NEXT: subs x8, x0, x4
507-
; CHECK-NEXT: sbcs x9, x1, x5
508-
; CHECK-NEXT: asr x10, x9, #63
509-
; CHECK-NEXT: eor x11, x10, #0x8000000000000000
510-
; CHECK-NEXT: csel x0, x10, x8, vs
511-
; CHECK-NEXT: csel x1, x11, x9, vs
512-
; CHECK-NEXT: subs x8, x2, x6
513-
; CHECK-NEXT: sbcs x9, x3, x7
514-
; CHECK-NEXT: asr x10, x9, #63
515-
; CHECK-NEXT: eor x11, x10, #0x8000000000000000
516-
; CHECK-NEXT: csel x2, x10, x8, vs
517-
; CHECK-NEXT: csel x3, x11, x9, vs
518-
; CHECK-NEXT: ret
503+
; CHECK-SD-LABEL: v2i128:
504+
; CHECK-SD: // %bb.0:
505+
; CHECK-SD-NEXT: subs x8, x0, x4
506+
; CHECK-SD-NEXT: sbcs x9, x1, x5
507+
; CHECK-SD-NEXT: asr x10, x9, #63
508+
; CHECK-SD-NEXT: eor x11, x10, #0x8000000000000000
509+
; CHECK-SD-NEXT: csel x0, x10, x8, vs
510+
; CHECK-SD-NEXT: csel x1, x11, x9, vs
511+
; CHECK-SD-NEXT: subs x8, x2, x6
512+
; CHECK-SD-NEXT: sbcs x9, x3, x7
513+
; CHECK-SD-NEXT: asr x10, x9, #63
514+
; CHECK-SD-NEXT: eor x11, x10, #0x8000000000000000
515+
; CHECK-SD-NEXT: csel x2, x10, x8, vs
516+
; CHECK-SD-NEXT: csel x3, x11, x9, vs
517+
; CHECK-SD-NEXT: ret
518+
;
519+
; CHECK-GI-LABEL: v2i128:
520+
; CHECK-GI: // %bb.0:
521+
; CHECK-GI-NEXT: subs x9, x0, x4
522+
; CHECK-GI-NEXT: mov w8, wzr
523+
; CHECK-GI-NEXT: mov x13, #-9223372036854775808 // =0x8000000000000000
524+
; CHECK-GI-NEXT: sbcs x10, x1, x5
525+
; CHECK-GI-NEXT: asr x11, x10, #63
526+
; CHECK-GI-NEXT: cset w12, vs
527+
; CHECK-GI-NEXT: cmp w8, #1
528+
; CHECK-GI-NEXT: adc x14, x11, x13
529+
; CHECK-GI-NEXT: tst w12, #0x1
530+
; CHECK-GI-NEXT: csel x0, x11, x9, ne
531+
; CHECK-GI-NEXT: csel x1, x14, x10, ne
532+
; CHECK-GI-NEXT: subs x9, x2, x6
533+
; CHECK-GI-NEXT: sbcs x10, x3, x7
534+
; CHECK-GI-NEXT: asr x11, x10, #63
535+
; CHECK-GI-NEXT: cset w12, vs
536+
; CHECK-GI-NEXT: cmp w8, #1
537+
; CHECK-GI-NEXT: adc x8, x11, x13
538+
; CHECK-GI-NEXT: tst w12, #0x1
539+
; CHECK-GI-NEXT: csel x2, x11, x9, ne
540+
; CHECK-GI-NEXT: csel x3, x8, x10, ne
541+
; CHECK-GI-NEXT: ret
519542
%z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
520543
ret <2 x i128> %z
521544
}

llvm/test/CodeGen/AArch64/uadd_sat_vec.ll

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
; CHECK-GI: warning: Instruction selection used fallback path for v16i4
66
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128
87

98
declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>)
109
declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>)
@@ -492,17 +491,33 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
492491
}
493492

494493
define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
495-
; CHECK-LABEL: v2i128:
496-
; CHECK: // %bb.0:
497-
; CHECK-NEXT: adds x8, x0, x4
498-
; CHECK-NEXT: adcs x9, x1, x5
499-
; CHECK-NEXT: csinv x0, x8, xzr, lo
500-
; CHECK-NEXT: csinv x1, x9, xzr, lo
501-
; CHECK-NEXT: adds x8, x2, x6
502-
; CHECK-NEXT: adcs x9, x3, x7
503-
; CHECK-NEXT: csinv x2, x8, xzr, lo
504-
; CHECK-NEXT: csinv x3, x9, xzr, lo
505-
; CHECK-NEXT: ret
494+
; CHECK-SD-LABEL: v2i128:
495+
; CHECK-SD: // %bb.0:
496+
; CHECK-SD-NEXT: adds x8, x0, x4
497+
; CHECK-SD-NEXT: adcs x9, x1, x5
498+
; CHECK-SD-NEXT: csinv x0, x8, xzr, lo
499+
; CHECK-SD-NEXT: csinv x1, x9, xzr, lo
500+
; CHECK-SD-NEXT: adds x8, x2, x6
501+
; CHECK-SD-NEXT: adcs x9, x3, x7
502+
; CHECK-SD-NEXT: csinv x2, x8, xzr, lo
503+
; CHECK-SD-NEXT: csinv x3, x9, xzr, lo
504+
; CHECK-SD-NEXT: ret
505+
;
506+
; CHECK-GI-LABEL: v2i128:
507+
; CHECK-GI: // %bb.0:
508+
; CHECK-GI-NEXT: adds x8, x0, x4
509+
; CHECK-GI-NEXT: adcs x9, x1, x5
510+
; CHECK-GI-NEXT: cset w10, hs
511+
; CHECK-GI-NEXT: tst w10, #0x1
512+
; CHECK-GI-NEXT: csinv x0, x8, xzr, eq
513+
; CHECK-GI-NEXT: csinv x1, x9, xzr, eq
514+
; CHECK-GI-NEXT: adds x8, x2, x6
515+
; CHECK-GI-NEXT: adcs x9, x3, x7
516+
; CHECK-GI-NEXT: cset w10, hs
517+
; CHECK-GI-NEXT: tst w10, #0x1
518+
; CHECK-GI-NEXT: csinv x2, x8, xzr, eq
519+
; CHECK-GI-NEXT: csinv x3, x9, xzr, eq
520+
; CHECK-GI-NEXT: ret
506521
%z = call <2 x i128> @llvm.uadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
507522
ret <2 x i128> %z
508523
}

llvm/test/CodeGen/AArch64/usub_sat_vec.ll

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
; CHECK-GI: warning: Instruction selection used fallback path for v16i4
66
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128
87

98
declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>)
109
declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>)
@@ -490,17 +489,33 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
490489
}
491490

492491
define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
493-
; CHECK-LABEL: v2i128:
494-
; CHECK: // %bb.0:
495-
; CHECK-NEXT: subs x8, x0, x4
496-
; CHECK-NEXT: sbcs x9, x1, x5
497-
; CHECK-NEXT: csel x0, xzr, x8, lo
498-
; CHECK-NEXT: csel x1, xzr, x9, lo
499-
; CHECK-NEXT: subs x8, x2, x6
500-
; CHECK-NEXT: sbcs x9, x3, x7
501-
; CHECK-NEXT: csel x2, xzr, x8, lo
502-
; CHECK-NEXT: csel x3, xzr, x9, lo
503-
; CHECK-NEXT: ret
492+
; CHECK-SD-LABEL: v2i128:
493+
; CHECK-SD: // %bb.0:
494+
; CHECK-SD-NEXT: subs x8, x0, x4
495+
; CHECK-SD-NEXT: sbcs x9, x1, x5
496+
; CHECK-SD-NEXT: csel x0, xzr, x8, lo
497+
; CHECK-SD-NEXT: csel x1, xzr, x9, lo
498+
; CHECK-SD-NEXT: subs x8, x2, x6
499+
; CHECK-SD-NEXT: sbcs x9, x3, x7
500+
; CHECK-SD-NEXT: csel x2, xzr, x8, lo
501+
; CHECK-SD-NEXT: csel x3, xzr, x9, lo
502+
; CHECK-SD-NEXT: ret
503+
;
504+
; CHECK-GI-LABEL: v2i128:
505+
; CHECK-GI: // %bb.0:
506+
; CHECK-GI-NEXT: subs x8, x0, x4
507+
; CHECK-GI-NEXT: sbcs x9, x1, x5
508+
; CHECK-GI-NEXT: cset w10, lo
509+
; CHECK-GI-NEXT: tst w10, #0x1
510+
; CHECK-GI-NEXT: csel x0, xzr, x8, ne
511+
; CHECK-GI-NEXT: csel x1, xzr, x9, ne
512+
; CHECK-GI-NEXT: subs x8, x2, x6
513+
; CHECK-GI-NEXT: sbcs x9, x3, x7
514+
; CHECK-GI-NEXT: cset w10, lo
515+
; CHECK-GI-NEXT: tst w10, #0x1
516+
; CHECK-GI-NEXT: csel x2, xzr, x8, ne
517+
; CHECK-GI-NEXT: csel x3, xzr, x9, ne
518+
; CHECK-GI-NEXT: ret
504519
%z = call <2 x i128> @llvm.usub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
505520
ret <2 x i128> %z
506521
}

0 commit comments

Comments
 (0)