Skip to content

Commit ac7c199

Browse files
authored
[AArch64][GlobalISel] Legalize more G_VECREDUCE_ADD operations. (llvm#123392)
Non-power-2 vectors will now be padded with zero elements, smaller vectors will be widened using anyext, which I believe will be better in many situations than padding with zeros, although some small types may prefer being scalarized depending on the code. Padding with zeros may not be best for all sizes (v5i8 being the worst), we can hopefully improve that in the future but they no longer fall back. We scalarize other types like i128.
1 parent b3458fd commit ac7c199

File tree

7 files changed

+4141
-2011
lines changed

7 files changed

+4141
-2011
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3342,6 +3342,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
33423342
Observer.changedInstr(MI);
33433343
return Legalized;
33443344
}
3345+
case TargetOpcode::G_VECREDUCE_ADD: {
3346+
if (TypeIdx != 1)
3347+
return UnableToLegalize;
3348+
Observer.changingInstr(MI);
3349+
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3350+
widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3351+
Observer.changedInstr(MI);
3352+
return Legalized;
3353+
}
33453354
case TargetOpcode::G_VECREDUCE_FADD:
33463355
case TargetOpcode::G_VECREDUCE_FMUL:
33473356
case TargetOpcode::G_VECREDUCE_FMIN:

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1215,11 +1215,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
12151215
{s32, v4s32},
12161216
{s32, v2s32},
12171217
{s64, v2s64}})
1218+
.moreElementsToNextPow2(1)
12181219
.clampMaxNumElements(1, s64, 2)
12191220
.clampMaxNumElements(1, s32, 4)
12201221
.clampMaxNumElements(1, s16, 8)
12211222
.clampMaxNumElements(1, s8, 16)
1222-
.lower();
1223+
.widenVectorEltsToVectorMinSize(1, 64)
1224+
.scalarize(1);
12231225

12241226
getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
12251227
G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})

llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-add.mir

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -157,12 +157,17 @@ body: |
157157
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
158158
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
159159
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
160-
; CHECK-NEXT: [[VECREDUCE_ADD:%[0-9]+]]:_(s64) = G_VECREDUCE_ADD [[COPY]](<2 x s64>)
161-
; CHECK-NEXT: [[VECREDUCE_ADD1:%[0-9]+]]:_(s64) = G_VECREDUCE_ADD [[COPY1]](<2 x s64>)
162-
; CHECK-NEXT: [[VECREDUCE_ADD2:%[0-9]+]]:_(s64) = G_VECREDUCE_ADD [[COPY2]](<2 x s64>)
163-
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[VECREDUCE_ADD]], [[VECREDUCE_ADD1]]
164-
; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ADD]], [[VECREDUCE_ADD2]]
165-
; CHECK-NEXT: $x0 = COPY [[ADD1]](s64)
160+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
161+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[DEF]](s64)
162+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
163+
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[C]](s64), [[C]](s64)
164+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
165+
; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[IVEC]], [[C]](s64), [[C1]](s64)
166+
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<2 x s64>) = G_ADD [[COPY]], [[COPY1]]
167+
; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s64>) = G_ADD [[COPY2]], [[IVEC1]]
168+
; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(<2 x s64>) = G_ADD [[ADD]], [[ADD1]]
169+
; CHECK-NEXT: [[VECREDUCE_ADD:%[0-9]+]]:_(s64) = G_VECREDUCE_ADD [[ADD2]](<2 x s64>)
170+
; CHECK-NEXT: $x0 = COPY [[VECREDUCE_ADD]](s64)
166171
; CHECK-NEXT: RET_ReallyLR implicit $x0
167172
%0:_(<2 x s64>) = COPY $q0
168173
%1:_(<2 x s64>) = COPY $q1

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -867,8 +867,8 @@
867867
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
868868
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
869869
# DEBUG-NEXT: G_VECREDUCE_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
870-
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
871-
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
870+
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
871+
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
872872
# DEBUG-NEXT: G_VECREDUCE_MUL (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
873873
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
874874
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected

llvm/test/CodeGen/AArch64/aarch64-addv.ll

Lines changed: 61 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=aarch64 -aarch64-neon-syntax=generic | FileCheck %s -check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -global-isel-abort=2 -aarch64-neon-syntax=generic 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
2+
; RUN: llc < %s -mtriple=aarch64 -aarch64-neon-syntax=generic | FileCheck %s -check-prefixes=CHECK,SDAG
3+
; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -aarch64-neon-syntax=generic | FileCheck %s --check-prefixes=CHECK,GISEL
44

55
declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
66
declare i8 @llvm.vector.reduce.add.v3i8(<3 x i8>)
@@ -22,15 +22,6 @@ declare i64 @llvm.vector.reduce.add.v3i64(<3 x i64>)
2222
declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
2323
declare i128 @llvm.vector.reduce.add.v2i128(<2 x i128>)
2424

25-
; CHECK-GI: warning: Instruction selection used fallback path for addv_v2i8
26-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for addv_v3i8
27-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for addv_v4i8
28-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for addv_v2i16
29-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for addv_v3i16
30-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for addv_v3i32
31-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for addv_v3i64
32-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for addv_v2i128
33-
3425

3526
define i8 @add_B(ptr %arr) {
3627
; CHECK-LABEL: add_B:
@@ -256,15 +247,26 @@ entry:
256247
}
257248

258249
define i8 @addv_v3i8(<3 x i8> %a) {
259-
; CHECK-LABEL: addv_v3i8:
260-
; CHECK: // %bb.0: // %entry
261-
; CHECK-NEXT: movi v0.2d, #0000000000000000
262-
; CHECK-NEXT: mov v0.h[0], w0
263-
; CHECK-NEXT: mov v0.h[1], w1
264-
; CHECK-NEXT: mov v0.h[2], w2
265-
; CHECK-NEXT: addv h0, v0.4h
266-
; CHECK-NEXT: fmov w0, s0
267-
; CHECK-NEXT: ret
250+
; SDAG-LABEL: addv_v3i8:
251+
; SDAG: // %bb.0: // %entry
252+
; SDAG-NEXT: movi v0.2d, #0000000000000000
253+
; SDAG-NEXT: mov v0.h[0], w0
254+
; SDAG-NEXT: mov v0.h[1], w1
255+
; SDAG-NEXT: mov v0.h[2], w2
256+
; SDAG-NEXT: addv h0, v0.4h
257+
; SDAG-NEXT: fmov w0, s0
258+
; SDAG-NEXT: ret
259+
;
260+
; GISEL-LABEL: addv_v3i8:
261+
; GISEL: // %bb.0: // %entry
262+
; GISEL-NEXT: fmov s0, w0
263+
; GISEL-NEXT: mov w8, #0 // =0x0
264+
; GISEL-NEXT: mov v0.h[1], w1
265+
; GISEL-NEXT: mov v0.h[2], w2
266+
; GISEL-NEXT: mov v0.h[3], w8
267+
; GISEL-NEXT: addv h0, v0.4h
268+
; GISEL-NEXT: fmov w0, s0
269+
; GISEL-NEXT: ret
268270
entry:
269271
%arg1 = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> %a)
270272
ret i8 %arg1
@@ -327,13 +329,22 @@ entry:
327329
}
328330

329331
define i16 @addv_v3i16(<3 x i16> %a) {
330-
; CHECK-LABEL: addv_v3i16:
331-
; CHECK: // %bb.0: // %entry
332-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
333-
; CHECK-NEXT: mov v0.h[3], wzr
334-
; CHECK-NEXT: addv h0, v0.4h
335-
; CHECK-NEXT: fmov w0, s0
336-
; CHECK-NEXT: ret
332+
; SDAG-LABEL: addv_v3i16:
333+
; SDAG: // %bb.0: // %entry
334+
; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0
335+
; SDAG-NEXT: mov v0.h[3], wzr
336+
; SDAG-NEXT: addv h0, v0.4h
337+
; SDAG-NEXT: fmov w0, s0
338+
; SDAG-NEXT: ret
339+
;
340+
; GISEL-LABEL: addv_v3i16:
341+
; GISEL: // %bb.0: // %entry
342+
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
343+
; GISEL-NEXT: mov w8, #0 // =0x0
344+
; GISEL-NEXT: mov v0.h[3], w8
345+
; GISEL-NEXT: addv h0, v0.4h
346+
; GISEL-NEXT: fmov w0, s0
347+
; GISEL-NEXT: ret
337348
entry:
338349
%arg1 = call i16 @llvm.vector.reduce.add.v3i16(<3 x i16> %a)
339350
ret i16 %arg1
@@ -431,17 +442,29 @@ entry:
431442
}
432443

433444
define i64 @addv_v3i64(<3 x i64> %a) {
434-
; CHECK-LABEL: addv_v3i64:
435-
; CHECK: // %bb.0: // %entry
436-
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
437-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
438-
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
439-
; CHECK-NEXT: mov v0.d[1], v1.d[0]
440-
; CHECK-NEXT: mov v2.d[1], xzr
441-
; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
442-
; CHECK-NEXT: addp d0, v0.2d
443-
; CHECK-NEXT: fmov x0, d0
444-
; CHECK-NEXT: ret
445+
; SDAG-LABEL: addv_v3i64:
446+
; SDAG: // %bb.0: // %entry
447+
; SDAG-NEXT: // kill: def $d2 killed $d2 def $q2
448+
; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0
449+
; SDAG-NEXT: // kill: def $d1 killed $d1 def $q1
450+
; SDAG-NEXT: mov v0.d[1], v1.d[0]
451+
; SDAG-NEXT: mov v2.d[1], xzr
452+
; SDAG-NEXT: add v0.2d, v0.2d, v2.2d
453+
; SDAG-NEXT: addp d0, v0.2d
454+
; SDAG-NEXT: fmov x0, d0
455+
; SDAG-NEXT: ret
456+
;
457+
; GISEL-LABEL: addv_v3i64:
458+
; GISEL: // %bb.0: // %entry
459+
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
460+
; GISEL-NEXT: // kill: def $d2 killed $d2 def $q2
461+
; GISEL-NEXT: // kill: def $d1 killed $d1 def $q1
462+
; GISEL-NEXT: mov v0.d[1], v1.d[0]
463+
; GISEL-NEXT: mov v2.d[1], xzr
464+
; GISEL-NEXT: add v0.2d, v0.2d, v2.2d
465+
; GISEL-NEXT: addp d0, v0.2d
466+
; GISEL-NEXT: fmov x0, d0
467+
; GISEL-NEXT: ret
445468
entry:
446469
%arg1 = call i64 @llvm.vector.reduce.add.v3i64(<3 x i64> %a)
447470
ret i64 %arg1

0 commit comments

Comments
 (0)