Skip to content

Commit 1c0b3d5

Browse files
hvdijkagozillon
authored andcommitted
[ARM] Switch to soft promoting half types. (llvm#80440)
The traditional promotion is known to generate wrong code. Fixes llvm#73805.
1 parent ec1d9c4 commit 1c0b3d5

16 files changed

+929
-1048
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9055,7 +9055,7 @@ SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
90559055
return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
90569056

90579057
if (getTypeAction(*DAG.getContext(), EltVT) ==
9058-
TargetLowering::TypePromoteFloat) {
9058+
TargetLowering::TypeSoftPromoteHalf) {
90599059
// INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
90609060
// but the type system will try to do that if we don't intervene.
90619061
// Reinterpret any such vector-element insertion as one with the
@@ -9065,7 +9065,7 @@ SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
90659065

90669066
EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
90679067
assert(getTypeAction(*DAG.getContext(), IEltVT) !=
9068-
TargetLowering::TypePromoteFloat);
9068+
TargetLowering::TypeSoftPromoteHalf);
90699069

90709070
SDValue VecIn = Op.getOperand(0);
90719071
EVT VecVT = VecIn.getValueType();

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,10 @@ class VectorType;
762762
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
763763
Value *Accumulator = nullptr) const override;
764764

765+
bool softPromoteHalfType() const override { return true; }
766+
767+
bool useFPRegsForHalfType() const override { return true; }
768+
765769
protected:
766770
std::pair<const TargetRegisterClass *, uint8_t>
767771
findRepresentativeClass(const TargetRegisterInfo *TRI,

llvm/test/CodeGen/ARM/aes-erratum-fix.ll

Lines changed: 490 additions & 608 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/ARM/arm-half-promote.ll

Lines changed: 95 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -2,78 +2,113 @@
22

33
define arm_aapcs_vfpcc { <8 x half>, <8 x half> } @f1() {
44
; CHECK-LABEL: _f1
5-
; CHECK: vpush {d8}
6-
; CHECK-NEXT: vmov.f64 d8, #5.000000e-01
7-
; CHECK-NEXT: vmov.i32 d8, #0x0
8-
; CHECK-NEXT: vmov.i32 d0, #0x0
9-
; CHECK-NEXT: vmov.i32 d1, #0x0
10-
; CHECK-NEXT: vmov.i32 d2, #0x0
11-
; CHECK-NEXT: vmov.i32 d3, #0x0
12-
; CHECK-NEXT: vmov.i32 d4, #0x0
13-
; CHECK-NEXT: vmov.i32 d5, #0x0
14-
; CHECK-NEXT: vmov.i32 d6, #0x0
15-
; CHECK-NEXT: vmov.i32 d7, #0x0
16-
; CHECK-NEXT: vmov.f32 s1, s16
17-
; CHECK-NEXT: vmov.f32 s3, s16
18-
; CHECK-NEXT: vmov.f32 s5, s16
19-
; CHECK-NEXT: vmov.f32 s7, s16
20-
; CHECK-NEXT: vmov.f32 s9, s16
21-
; CHECK-NEXT: vmov.f32 s11, s16
22-
; CHECK-NEXT: vmov.f32 s13, s16
23-
; CHECK-NEXT: vmov.f32 s15, s16
24-
; CHECK-NEXT: vpop {d8}
5+
; CHECK: vpush {d8, d9, d10, d11}
6+
; CHECK-NEXT: vmov.i32 q8, #0x0
7+
; CHECK-NEXT: vmov.u16 r0, d16[0]
8+
; CHECK-NEXT: vmov d4, r0, r0
9+
; CHECK-NEXT: vmov.u16 r0, d16[1]
10+
; CHECK-NEXT: vmov d8, r0, r0
11+
; CHECK-NEXT: vmov.u16 r0, d16[2]
12+
; CHECK-NEXT: vmov d5, r0, r0
13+
; CHECK-NEXT: vmov.u16 r0, d16[3]
14+
; CHECK-NEXT: vmov d9, r0, r0
15+
; CHECK-NEXT: vmov.u16 r0, d17[0]
16+
; CHECK-NEXT: vmov d6, r0, r0
17+
; CHECK-NEXT: vmov.u16 r0, d17[1]
18+
; CHECK-NEXT: vmov d10, r0, r0
19+
; CHECK-NEXT: vmov.u16 r0, d17[2]
20+
; CHECK-NEXT: vmov d7, r0, r0
21+
; CHECK-NEXT: vmov.u16 r0, d17[3]
22+
; CHECK-NEXT: vmov d11, r0, r0
23+
; CHECK: vmov.f32 s0, s8
24+
; CHECK: vmov.f32 s1, s16
25+
; CHECK: vmov.f32 s2, s10
26+
; CHECK: vmov.f32 s3, s18
27+
; CHECK: vmov.f32 s4, s12
28+
; CHECK: vmov.f32 s5, s20
29+
; CHECK: vmov.f32 s6, s14
30+
; CHECK: vmov.f32 s7, s22
31+
; CHECK: vmov.f32 s9, s16
32+
; CHECK: vmov.f32 s11, s18
33+
; CHECK: vmov.f32 s13, s20
34+
; CHECK: vmov.f32 s15, s22
35+
; CHECK: vpop {d8, d9, d10, d11}
2536
; CHECK-NEXT: bx lr
37+
2638
ret { <8 x half>, <8 x half> } zeroinitializer
2739
}
2840

2941
define swiftcc { <8 x half>, <8 x half> } @f2() {
3042
; CHECK-LABEL: _f2
31-
; CHECK: vpush {d8}
32-
; CHECK-NEXT: vmov.f64 d8, #5.000000e-01
33-
; CHECK-NEXT: vmov.i32 d8, #0x0
34-
; CHECK-NEXT: vmov.i32 d0, #0x0
35-
; CHECK-NEXT: vmov.i32 d1, #0x0
36-
; CHECK-NEXT: vmov.i32 d2, #0x0
37-
; CHECK-NEXT: vmov.i32 d3, #0x0
38-
; CHECK-NEXT: vmov.i32 d4, #0x0
39-
; CHECK-NEXT: vmov.i32 d5, #0x0
40-
; CHECK-NEXT: vmov.i32 d6, #0x0
41-
; CHECK-NEXT: vmov.i32 d7, #0x0
42-
; CHECK-NEXT: vmov.f32 s1, s16
43-
; CHECK-NEXT: vmov.f32 s3, s16
44-
; CHECK-NEXT: vmov.f32 s5, s16
45-
; CHECK-NEXT: vmov.f32 s7, s16
46-
; CHECK-NEXT: vmov.f32 s9, s16
47-
; CHECK-NEXT: vmov.f32 s11, s16
48-
; CHECK-NEXT: vmov.f32 s13, s16
49-
; CHECK-NEXT: vmov.f32 s15, s16
50-
; CHECK-NEXT: vpop {d8}
43+
; CHECK: vpush {d8, d9, d10, d11}
44+
; CHECK-NEXT: vmov.i32 q8, #0x0
45+
; CHECK-NEXT: vmov.u16 r0, d16[0]
46+
; CHECK-NEXT: vmov d4, r0, r0
47+
; CHECK-NEXT: vmov.u16 r0, d16[1]
48+
; CHECK-NEXT: vmov d8, r0, r0
49+
; CHECK-NEXT: vmov.u16 r0, d16[2]
50+
; CHECK-NEXT: vmov d5, r0, r0
51+
; CHECK-NEXT: vmov.u16 r0, d16[3]
52+
; CHECK-NEXT: vmov d9, r0, r0
53+
; CHECK-NEXT: vmov.u16 r0, d17[0]
54+
; CHECK-NEXT: vmov d6, r0, r0
55+
; CHECK-NEXT: vmov.u16 r0, d17[1]
56+
; CHECK-NEXT: vmov d10, r0, r0
57+
; CHECK-NEXT: vmov.u16 r0, d17[2]
58+
; CHECK-NEXT: vmov d7, r0, r0
59+
; CHECK-NEXT: vmov.u16 r0, d17[3]
60+
; CHECK-NEXT: vmov d11, r0, r0
61+
; CHECK: vmov.f32 s0, s8
62+
; CHECK: vmov.f32 s1, s16
63+
; CHECK: vmov.f32 s2, s10
64+
; CHECK: vmov.f32 s3, s18
65+
; CHECK: vmov.f32 s4, s12
66+
; CHECK: vmov.f32 s5, s20
67+
; CHECK: vmov.f32 s6, s14
68+
; CHECK: vmov.f32 s7, s22
69+
; CHECK: vmov.f32 s9, s16
70+
; CHECK: vmov.f32 s11, s18
71+
; CHECK: vmov.f32 s13, s20
72+
; CHECK: vmov.f32 s15, s22
73+
; CHECK-NEXT: vpop {d8, d9, d10, d11}
5174
; CHECK-NEXT: bx lr
75+
5276
ret { <8 x half>, <8 x half> } zeroinitializer
5377
}
5478

5579
define fastcc { <8 x half>, <8 x half> } @f3() {
5680
; CHECK-LABEL: _f3
57-
; CHECK: vpush {d8}
58-
; CHECK-NEXT: vmov.f64 d8, #5.000000e-01
59-
; CHECK-NEXT: vmov.i32 d8, #0x0
60-
; CHECK-NEXT: vmov.i32 d0, #0x0
61-
; CHECK-NEXT: vmov.i32 d1, #0x0
62-
; CHECK-NEXT: vmov.i32 d2, #0x0
63-
; CHECK-NEXT: vmov.i32 d3, #0x0
64-
; CHECK-NEXT: vmov.i32 d4, #0x0
65-
; CHECK-NEXT: vmov.i32 d5, #0x0
66-
; CHECK-NEXT: vmov.i32 d6, #0x0
67-
; CHECK-NEXT: vmov.i32 d7, #0x0
68-
; CHECK-NEXT: vmov.f32 s1, s16
69-
; CHECK-NEXT: vmov.f32 s3, s16
70-
; CHECK-NEXT: vmov.f32 s5, s16
71-
; CHECK-NEXT: vmov.f32 s7, s16
72-
; CHECK-NEXT: vmov.f32 s9, s16
73-
; CHECK-NEXT: vmov.f32 s11, s16
74-
; CHECK-NEXT: vmov.f32 s13, s16
75-
; CHECK-NEXT: vmov.f32 s15, s16
76-
; CHECK-NEXT: vpop {d8}
81+
; CHECK: vpush {d8, d9, d10, d11}
82+
; CHECK-NEXT: vmov.i32 q8, #0x0
83+
; CHECK-NEXT: vmov.u16 r0, d16[0]
84+
; CHECK-NEXT: vmov d4, r0, r0
85+
; CHECK-NEXT: vmov.u16 r0, d16[1]
86+
; CHECK-NEXT: vmov d8, r0, r0
87+
; CHECK-NEXT: vmov.u16 r0, d16[2]
88+
; CHECK-NEXT: vmov d5, r0, r0
89+
; CHECK-NEXT: vmov.u16 r0, d16[3]
90+
; CHECK-NEXT: vmov d9, r0, r0
91+
; CHECK-NEXT: vmov.u16 r0, d17[0]
92+
; CHECK-NEXT: vmov d6, r0, r0
93+
; CHECK-NEXT: vmov.u16 r0, d17[1]
94+
; CHECK-NEXT: vmov d10, r0, r0
95+
; CHECK-NEXT: vmov.u16 r0, d17[2]
96+
; CHECK-NEXT: vmov d7, r0, r0
97+
; CHECK-NEXT: vmov.u16 r0, d17[3]
98+
; CHECK-NEXT: vmov d11, r0, r0
99+
; CHECK: vmov.f32 s0, s8
100+
; CHECK: vmov.f32 s1, s16
101+
; CHECK: vmov.f32 s2, s10
102+
; CHECK: vmov.f32 s3, s18
103+
; CHECK: vmov.f32 s4, s12
104+
; CHECK: vmov.f32 s5, s20
105+
; CHECK: vmov.f32 s6, s14
106+
; CHECK: vmov.f32 s7, s22
107+
; CHECK: vmov.f32 s9, s16
108+
; CHECK: vmov.f32 s11, s18
109+
; CHECK: vmov.f32 s13, s20
110+
; CHECK: vmov.f32 s15, s22
111+
; CHECK-NEXT: vpop {d8, d9, d10, d11}
77112
; CHECK-NEXT: bx lr
78113

79114
ret { <8 x half>, <8 x half> } zeroinitializer

llvm/test/CodeGen/ARM/fp16-args.ll

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -46,46 +46,6 @@ entry:
4646
}
4747

4848
define <4 x half> @foo_vec(<4 x half> %a) {
49-
; SOFT-LABEL: foo_vec:
50-
; SOFT: @ %bb.0: @ %entry
51-
; SOFT-NEXT: vmov s0, r3
52-
; SOFT-NEXT: vmov s2, r1
53-
; SOFT-NEXT: vcvtb.f32.f16 s0, s0
54-
; SOFT-NEXT: vmov s4, r0
55-
; SOFT-NEXT: vcvtb.f32.f16 s2, s2
56-
; SOFT-NEXT: vmov s6, r2
57-
; SOFT-NEXT: vcvtb.f32.f16 s4, s4
58-
; SOFT-NEXT: vcvtb.f32.f16 s6, s6
59-
; SOFT-NEXT: vadd.f32 s0, s0, s0
60-
; SOFT-NEXT: vadd.f32 s2, s2, s2
61-
; SOFT-NEXT: vcvtb.f16.f32 s0, s0
62-
; SOFT-NEXT: vadd.f32 s4, s4, s4
63-
; SOFT-NEXT: vcvtb.f16.f32 s2, s2
64-
; SOFT-NEXT: vadd.f32 s6, s6, s6
65-
; SOFT-NEXT: vcvtb.f16.f32 s4, s4
66-
; SOFT-NEXT: vcvtb.f16.f32 s6, s6
67-
; SOFT-NEXT: vmov r0, s4
68-
; SOFT-NEXT: vmov r1, s2
69-
; SOFT-NEXT: vmov r2, s6
70-
; SOFT-NEXT: vmov r3, s0
71-
; SOFT-NEXT: bx lr
72-
;
73-
; HARD-LABEL: foo_vec:
74-
; HARD: @ %bb.0: @ %entry
75-
; HARD-NEXT: vcvtb.f32.f16 s4, s3
76-
; HARD-NEXT: vcvtb.f32.f16 s2, s2
77-
; HARD-NEXT: vcvtb.f32.f16 s6, s1
78-
; HARD-NEXT: vcvtb.f32.f16 s0, s0
79-
; HARD-NEXT: vadd.f32 s2, s2, s2
80-
; HARD-NEXT: vadd.f32 s0, s0, s0
81-
; HARD-NEXT: vcvtb.f16.f32 s2, s2
82-
; HARD-NEXT: vadd.f32 s4, s4, s4
83-
; HARD-NEXT: vcvtb.f16.f32 s0, s0
84-
; HARD-NEXT: vadd.f32 s6, s6, s6
85-
; HARD-NEXT: vcvtb.f16.f32 s3, s4
86-
; HARD-NEXT: vcvtb.f16.f32 s1, s6
87-
; HARD-NEXT: bx lr
88-
;
8949
; FULL-SOFT-LE-LABEL: foo_vec:
9050
; FULL-SOFT-LE: @ %bb.0: @ %entry
9151
; FULL-SOFT-LE-NEXT: vmov d16, r0, r1

0 commit comments

Comments
 (0)