Skip to content

Commit fc952b2

Browse files
committed
[AArch64] Add pre-index store patterns for bf16.
These, like the postinc patterns, need adding very similarly to fp16. Fixes #97870
1 parent bf258db commit fc952b2

File tree

2 files changed

+163
-38
lines changed

2 files changed

+163
-38
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4660,6 +4660,10 @@ def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>;
46604660
def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>;
46614661
def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>;
46624662

4663+
// bf16 pre-index store
4664+
def : Pat<(pre_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off),
4665+
(STRHpre FPR16:$Rt, GPR64sp:$addr, simm9:$off)>;
4666+
46634667
// truncstore i64
46644668
def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
46654669
(STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
@@ -4685,6 +4689,8 @@ def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
46854689
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
46864690
def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
46874691
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4692+
def : Pat<(pre_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4693+
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
46884694

46894695
def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
46904696
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
@@ -4700,6 +4706,8 @@ def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
47004706
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
47014707
def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
47024708
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4709+
def : Pat<(pre_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4710+
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
47034711

47044712
//---
47054713
// (immediate post-indexed)

llvm/test/CodeGen/AArch64/bf16.ll

Lines changed: 155 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,61 @@
1-
; RUN: llc < %s -asm-verbose=0 -mtriple=arm64-eabi -mattr=+bf16 | FileCheck %s
2-
; RUN: llc < %s -asm-verbose=0 -mtriple=aarch64 -mattr=+bf16 | FileCheck %s
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=arm64-eabi -mattr=+bf16 | FileCheck %s
3+
; RUN: llc < %s -mtriple=aarch64 -mattr=+bf16 | FileCheck %s
34

45
; test argument passing and simple load/store
56

67
define bfloat @test_load(ptr %p) nounwind {
78
; CHECK-LABEL: test_load:
8-
; CHECK-NEXT: ldr h0, [x0]
9-
; CHECK-NEXT: ret
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: ldr h0, [x0]
11+
; CHECK-NEXT: ret
1012
%tmp1 = load bfloat, ptr %p, align 16
1113
ret bfloat %tmp1
1214
}
1315

1416
define bfloat @test_load_offset1(ptr %p) nounwind {
1517
; CHECK-LABEL: test_load_offset1:
16-
; CHECK-NEXT: ldur h0, [x0, #1]
17-
; CHECK-NEXT: ret
18+
; CHECK: // %bb.0:
19+
; CHECK-NEXT: ldur h0, [x0, #1]
20+
; CHECK-NEXT: ret
1821
%g = getelementptr inbounds i8, ptr %p, i64 1
1922
%tmp1 = load bfloat, ptr %g, align 2
2023
ret bfloat %tmp1
2124
}
2225

2326
define bfloat @test_load_offset2(ptr %p) nounwind {
2427
; CHECK-LABEL: test_load_offset2:
25-
; CHECK-NEXT: ldr h0, [x0, #2]
26-
; CHECK-NEXT: ret
28+
; CHECK: // %bb.0:
29+
; CHECK-NEXT: ldr h0, [x0, #2]
30+
; CHECK-NEXT: ret
2731
%g = getelementptr inbounds i8, ptr %p, i64 2
2832
%tmp1 = load bfloat, ptr %g, align 2
2933
ret bfloat %tmp1
3034
}
3135

3236
define <4 x bfloat> @test_vec_load(ptr %p) nounwind {
3337
; CHECK-LABEL: test_vec_load:
34-
; CHECK-NEXT: ldr d0, [x0]
35-
; CHECK-NEXT: ret
38+
; CHECK: // %bb.0:
39+
; CHECK-NEXT: ldr d0, [x0]
40+
; CHECK-NEXT: ret
3641
%tmp1 = load <4 x bfloat>, ptr %p, align 16
3742
ret <4 x bfloat> %tmp1
3843
}
3944

4045
define void @test_store(ptr %a, bfloat %b) nounwind {
4146
; CHECK-LABEL: test_store:
42-
; CHECK-NEXT: str h0, [x0]
43-
; CHECK-NEXT: ret
47+
; CHECK: // %bb.0:
48+
; CHECK-NEXT: str h0, [x0]
49+
; CHECK-NEXT: ret
4450
store bfloat %b, ptr %a, align 16
4551
ret void
4652
}
4753

4854
define void @test_store_negative_offset(ptr %a, bfloat %b) nounwind {
4955
; CHECK-LABEL: test_store_negative_offset:
50-
; CHECK-NEXT: stur h0, [x0, #-4]
51-
; CHECK-NEXT: ret
56+
; CHECK: // %bb.0: // %entry
57+
; CHECK-NEXT: stur h0, [x0, #-4]
58+
; CHECK-NEXT: ret
5259
entry:
5360
%0 = getelementptr inbounds bfloat, ptr %a, i64 -2
5461
store bfloat %b, ptr %0, align 2
@@ -58,69 +65,179 @@ entry:
5865
; Simple store of v4bf16
5966
define void @test_vec_store(ptr %a, <4 x bfloat> %b) nounwind {
6067
; CHECK-LABEL: test_vec_store:
61-
; CHECK-NEXT: str d0, [x0]
62-
; CHECK-NEXT: ret
68+
; CHECK: // %bb.0: // %entry
69+
; CHECK-NEXT: str d0, [x0]
70+
; CHECK-NEXT: ret
6371
entry:
6472
store <4 x bfloat> %b, ptr %a, align 16
6573
ret void
6674
}
6775

6876
define <8 x bfloat> @test_build_vector_const() {
6977
; CHECK-LABEL: test_build_vector_const:
70-
; CHECK: mov [[TMP:w[0-9]+]], #16256
71-
; CHECK: dup v0.8h, [[TMP]]
78+
; CHECK: // %bb.0:
79+
; CHECK-NEXT: mov w8, #16256 // =0x3f80
80+
; CHECK-NEXT: dup v0.8h, w8
81+
; CHECK-NEXT: ret
7282
ret <8 x bfloat> <bfloat 0xR3F80, bfloat 0xR3F80, bfloat 0xR3F80, bfloat 0xR3F80, bfloat 0xR3F80, bfloat 0xR3F80, bfloat 0xR3F80, bfloat 0xR3F80>
7383
}
7484

75-
define { bfloat, ptr } @test_store_post(bfloat %val, ptr %ptr) {
85+
define ptr @test_store_post(bfloat %val, ptr %ptr) {
7686
; CHECK-LABEL: test_store_post:
77-
; CHECK: str h0, [x0], #2
78-
87+
; CHECK: // %bb.0:
88+
; CHECK-NEXT: str h0, [x0], #2
89+
; CHECK-NEXT: ret
7990
store bfloat %val, ptr %ptr
80-
%res.tmp = insertvalue { bfloat, ptr } undef, bfloat %val, 0
91+
%next = getelementptr bfloat, ptr %ptr, i32 1
92+
ret ptr %next
93+
}
94+
95+
define ptr @test_store_post_v4bf16(<4 x bfloat> %val, ptr %ptr) {
96+
; CHECK-LABEL: test_store_post_v4bf16:
97+
; CHECK: // %bb.0:
98+
; CHECK-NEXT: str d0, [x0], #8
99+
; CHECK-NEXT: ret
100+
store <4 x bfloat> %val, ptr %ptr
101+
%next = getelementptr <4 x bfloat>, ptr %ptr, i32 1
102+
ret ptr %next
103+
}
104+
105+
define ptr @test_store_post_v8bf16(<8 x bfloat> %val, ptr %ptr) {
106+
; CHECK-LABEL: test_store_post_v8bf16:
107+
; CHECK: // %bb.0:
108+
; CHECK-NEXT: str q0, [x0], #16
109+
; CHECK-NEXT: ret
110+
store <8 x bfloat> %val, ptr %ptr
111+
%next = getelementptr <8 x bfloat>, ptr %ptr, i32 1
112+
ret ptr %next
113+
}
81114

115+
define { bfloat, ptr } @test_load_post(ptr %ptr) {
116+
; CHECK-LABEL: test_load_post:
117+
; CHECK: // %bb.0:
118+
; CHECK-NEXT: ldr h0, [x0], #2
119+
; CHECK-NEXT: ret
120+
%val = load bfloat, ptr %ptr
121+
%res.tmp = insertvalue { bfloat, ptr } undef, bfloat %val, 0
82122
%next = getelementptr bfloat, ptr %ptr, i32 1
83123
%res = insertvalue { bfloat, ptr } %res.tmp, ptr %next, 1
84-
85124
ret { bfloat, ptr } %res
86125
}
87126

88-
define { <4 x bfloat>, ptr } @test_store_post_v4bf16(<4 x bfloat> %val, ptr %ptr) {
89-
; CHECK-LABEL: test_store_post_v4bf16:
90-
; CHECK: str d0, [x0], #8
91-
92-
store <4 x bfloat> %val, ptr %ptr
127+
define { <4 x bfloat>, ptr } @test_load_post_v4bf16(ptr %ptr) {
128+
; CHECK-LABEL: test_load_post_v4bf16:
129+
; CHECK: // %bb.0:
130+
; CHECK-NEXT: ldr d0, [x0], #8
131+
; CHECK-NEXT: ret
132+
%val = load <4 x bfloat>, ptr %ptr
93133
%res.tmp = insertvalue { <4 x bfloat>, ptr } undef, <4 x bfloat> %val, 0
94-
95134
%next = getelementptr <4 x bfloat>, ptr %ptr, i32 1
96135
%res = insertvalue { <4 x bfloat>, ptr } %res.tmp, ptr %next, 1
97-
98136
ret { <4 x bfloat>, ptr } %res
99137
}
100138

101-
define { <8 x bfloat>, ptr } @test_store_post_v8bf16(<8 x bfloat> %val, ptr %ptr) {
102-
; CHECK-LABEL: test_store_post_v8bf16:
103-
; CHECK: str q0, [x0], #16
104-
105-
store <8 x bfloat> %val, ptr %ptr
139+
define { <8 x bfloat>, ptr } @test_load_post_v8bf16(ptr %ptr) {
140+
; CHECK-LABEL: test_load_post_v8bf16:
141+
; CHECK: // %bb.0:
142+
; CHECK-NEXT: ldr q0, [x0], #16
143+
; CHECK-NEXT: ret
144+
%val = load <8 x bfloat>, ptr %ptr
106145
%res.tmp = insertvalue { <8 x bfloat>, ptr } undef, <8 x bfloat> %val, 0
107-
108146
%next = getelementptr <8 x bfloat>, ptr %ptr, i32 1
109147
%res = insertvalue { <8 x bfloat>, ptr } %res.tmp, ptr %next, 1
148+
ret { <8 x bfloat>, ptr } %res
149+
}
110150

151+
define ptr @test_store_pre(bfloat %val, ptr %ptr) {
152+
; CHECK-LABEL: test_store_pre:
153+
; CHECK: // %bb.0:
154+
; CHECK-NEXT: str h0, [x0, #2]!
155+
; CHECK-NEXT: ret
156+
%next = getelementptr bfloat, ptr %ptr, i32 1
157+
store bfloat %val, ptr %next
158+
ret ptr %next
159+
}
160+
161+
define ptr @test_store_pre_v4bf16(<4 x bfloat> %val, ptr %ptr) {
162+
; CHECK-LABEL: test_store_pre_v4bf16:
163+
; CHECK: // %bb.0:
164+
; CHECK-NEXT: str d0, [x0, #8]!
165+
; CHECK-NEXT: ret
166+
%next = getelementptr <4 x bfloat>, ptr %ptr, i32 1
167+
store <4 x bfloat> %val, ptr %next
168+
ret ptr %next
169+
}
170+
171+
define ptr @test_store_pre_v8bf16(<8 x bfloat> %val, ptr %ptr) {
172+
; CHECK-LABEL: test_store_pre_v8bf16:
173+
; CHECK: // %bb.0:
174+
; CHECK-NEXT: str q0, [x0, #16]!
175+
; CHECK-NEXT: ret
176+
%next = getelementptr <8 x bfloat>, ptr %ptr, i32 1
177+
store <8 x bfloat> %val, ptr %next
178+
ret ptr %next
179+
}
180+
181+
define ptr @test_store_pre_v8bf16_trunc(ptr %ptr) {
182+
; CHECK-LABEL: test_store_pre_v8bf16_trunc:
183+
; CHECK: // %bb.0:
184+
; CHECK-NEXT: ldr q0, [x0]
185+
; CHECK-NEXT: str q0, [x0, #16]!
186+
; CHECK-NEXT: ret
187+
%t = load <8 x bfloat>, ptr %ptr
188+
%next = getelementptr <8 x bfloat>, ptr %ptr, i32 1
189+
store <8 x bfloat> %t, ptr %next
190+
ret ptr %next
191+
}
192+
193+
define { bfloat, ptr } @test_load_pre(ptr %ptr) {
194+
; CHECK-LABEL: test_load_pre:
195+
; CHECK: // %bb.0:
196+
; CHECK-NEXT: ldr h0, [x0, #2]!
197+
; CHECK-NEXT: ret
198+
%next = getelementptr bfloat, ptr %ptr, i32 1
199+
%val = load bfloat, ptr %next
200+
%res.tmp = insertvalue { bfloat, ptr } undef, bfloat %val, 0
201+
%res = insertvalue { bfloat, ptr } %res.tmp, ptr %next, 1
202+
ret { bfloat, ptr } %res
203+
}
204+
205+
define { <4 x bfloat>, ptr } @test_load_pre_v4bf16(ptr %ptr) {
206+
; CHECK-LABEL: test_load_pre_v4bf16:
207+
; CHECK: // %bb.0:
208+
; CHECK-NEXT: ldr d0, [x0, #8]!
209+
; CHECK-NEXT: ret
210+
%next = getelementptr <4 x bfloat>, ptr %ptr, i32 1
211+
%val = load <4 x bfloat>, ptr %next
212+
%res.tmp = insertvalue { <4 x bfloat>, ptr } undef, <4 x bfloat> %val, 0
213+
%res = insertvalue { <4 x bfloat>, ptr } %res.tmp, ptr %next, 1
214+
ret { <4 x bfloat>, ptr } %res
215+
}
216+
217+
define { <8 x bfloat>, ptr } @test_load_pre_v8bf16(ptr %ptr) {
218+
; CHECK-LABEL: test_load_pre_v8bf16:
219+
; CHECK: // %bb.0:
220+
; CHECK-NEXT: ldr q0, [x0, #16]!
221+
; CHECK-NEXT: ret
222+
%next = getelementptr <8 x bfloat>, ptr %ptr, i32 1
223+
%val = load <8 x bfloat>, ptr %next
224+
%res.tmp = insertvalue { <8 x bfloat>, ptr } undef, <8 x bfloat> %val, 0
225+
%res = insertvalue { <8 x bfloat>, ptr } %res.tmp, ptr %next, 1
111226
ret { <8 x bfloat>, ptr } %res
112227
}
113228

114229
define bfloat @test_bitcast_halftobfloat(half %a) nounwind {
115230
; CHECK-LABEL: test_bitcast_halftobfloat:
116-
; CHECK-NEXT: ret
231+
; CHECK: // %bb.0:
232+
; CHECK-NEXT: ret
117233
%r = bitcast half %a to bfloat
118234
ret bfloat %r
119235
}
120236

121237
define half @test_bitcast_bfloattohalf(bfloat %a) nounwind {
122238
; CHECK-LABEL: test_bitcast_bfloattohalf:
123-
; CHECK-NEXT: ret
239+
; CHECK: // %bb.0:
240+
; CHECK-NEXT: ret
124241
%r = bitcast bfloat %a to half
125242
ret half %r
126243
}

0 commit comments

Comments
 (0)