Skip to content

Commit 8b26c02

Browse files
authored
[LoongArch] Align stack objects passed to memory intrinsics (#101309)
Memcpy, and other memory intrinsics, typically try to use wider load/store if the source and destination addresses are aligned. In CodeGenPrepare, look for calls to memory intrinsics and, if the object is on the stack, align it to 4-byte (32-bit) or 8-byte (64-bit) boundaries if it is large enough that we expect memcpy to use wider load/store instructions to copy it. Fixes #101295
1 parent 7a134f5 commit 8b26c02

File tree

3 files changed

+39
-117
lines changed

3 files changed

+39
-117
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/CodeGen/RuntimeLibcallUtil.h"
2626
#include "llvm/CodeGen/SelectionDAGNodes.h"
2727
#include "llvm/IR/IRBuilder.h"
28+
#include "llvm/IR/IntrinsicInst.h"
2829
#include "llvm/IR/IntrinsicsLoongArch.h"
2930
#include "llvm/Support/CodeGen.h"
3031
#include "llvm/Support/Debug.h"
@@ -6162,3 +6163,23 @@ bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
61626163
return false;
61636164
return true;
61646165
}
6166+
6167+
// memcpy, and other memory intrinsics, typically tries to use wider load/store
6168+
// if the source/dest is aligned and the copy size is large enough. We therefore
6169+
// want to align such objects passed to memory intrinsics.
6170+
bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
6171+
unsigned &MinSize,
6172+
Align &PrefAlign) const {
6173+
if (!isa<MemIntrinsic>(CI))
6174+
return false;
6175+
6176+
if (Subtarget.is64Bit()) {
6177+
MinSize = 8;
6178+
PrefAlign = Align(8);
6179+
} else {
6180+
MinSize = 4;
6181+
PrefAlign = Align(4);
6182+
}
6183+
6184+
return true;
6185+
}

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,9 @@ class LoongArchTargetLowering : public TargetLowering {
257257
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
258258
bool shouldExtendTypeInLibCall(EVT Type) const override;
259259

260+
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
261+
Align &PrefAlign) const override;
262+
260263
private:
261264
/// Target-specific function used to lower LoongArch calling conventions.
262265
typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,

llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll

Lines changed: 15 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -103,65 +103,17 @@ define void @t3() {
103103
; LA32-NEXT: .cfi_def_cfa_offset 64
104104
; LA32-NEXT: pcalau12i $a0, %pc_hi20(.L.str)
105105
; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(.L.str)
106-
; LA32-NEXT: ld.b $a1, $a0, 21
107-
; LA32-NEXT: ld.bu $a2, $a0, 20
108-
; LA32-NEXT: slli.w $a1, $a1, 8
109-
; LA32-NEXT: or $a1, $a1, $a2
106+
; LA32-NEXT: ld.h $a1, $a0, 20
107+
; LA32-NEXT: ld.w $a2, $a0, 16
110108
; LA32-NEXT: st.h $a1, $sp, 20
111-
; LA32-NEXT: ld.bu $a1, $a0, 17
112-
; LA32-NEXT: ld.bu $a2, $a0, 16
113-
; LA32-NEXT: ld.bu $a3, $a0, 18
114-
; LA32-NEXT: ld.bu $a4, $a0, 19
115-
; LA32-NEXT: slli.w $a1, $a1, 8
116-
; LA32-NEXT: or $a1, $a1, $a2
117-
; LA32-NEXT: slli.w $a2, $a3, 16
118-
; LA32-NEXT: slli.w $a3, $a4, 24
119-
; LA32-NEXT: or $a2, $a3, $a2
120-
; LA32-NEXT: or $a1, $a2, $a1
121-
; LA32-NEXT: st.w $a1, $sp, 16
122-
; LA32-NEXT: ld.bu $a1, $a0, 13
123-
; LA32-NEXT: ld.bu $a2, $a0, 12
124-
; LA32-NEXT: ld.bu $a3, $a0, 14
125-
; LA32-NEXT: ld.bu $a4, $a0, 15
126-
; LA32-NEXT: slli.w $a1, $a1, 8
127-
; LA32-NEXT: or $a1, $a1, $a2
128-
; LA32-NEXT: slli.w $a2, $a3, 16
129-
; LA32-NEXT: slli.w $a3, $a4, 24
130-
; LA32-NEXT: or $a2, $a3, $a2
131-
; LA32-NEXT: or $a1, $a2, $a1
109+
; LA32-NEXT: st.w $a2, $sp, 16
110+
; LA32-NEXT: ld.w $a1, $a0, 12
111+
; LA32-NEXT: ld.w $a2, $a0, 8
112+
; LA32-NEXT: ld.w $a3, $a0, 4
113+
; LA32-NEXT: ld.w $a0, $a0, 0
132114
; LA32-NEXT: st.w $a1, $sp, 12
133-
; LA32-NEXT: ld.bu $a1, $a0, 9
134-
; LA32-NEXT: ld.bu $a2, $a0, 8
135-
; LA32-NEXT: ld.bu $a3, $a0, 10
136-
; LA32-NEXT: ld.bu $a4, $a0, 11
137-
; LA32-NEXT: slli.w $a1, $a1, 8
138-
; LA32-NEXT: or $a1, $a1, $a2
139-
; LA32-NEXT: slli.w $a2, $a3, 16
140-
; LA32-NEXT: slli.w $a3, $a4, 24
141-
; LA32-NEXT: or $a2, $a3, $a2
142-
; LA32-NEXT: or $a1, $a2, $a1
143-
; LA32-NEXT: st.w $a1, $sp, 8
144-
; LA32-NEXT: ld.bu $a1, $a0, 5
145-
; LA32-NEXT: ld.bu $a2, $a0, 4
146-
; LA32-NEXT: ld.bu $a3, $a0, 6
147-
; LA32-NEXT: ld.bu $a4, $a0, 7
148-
; LA32-NEXT: slli.w $a1, $a1, 8
149-
; LA32-NEXT: or $a1, $a1, $a2
150-
; LA32-NEXT: slli.w $a2, $a3, 16
151-
; LA32-NEXT: slli.w $a3, $a4, 24
152-
; LA32-NEXT: or $a2, $a3, $a2
153-
; LA32-NEXT: or $a1, $a2, $a1
154-
; LA32-NEXT: st.w $a1, $sp, 4
155-
; LA32-NEXT: ld.bu $a1, $a0, 1
156-
; LA32-NEXT: ld.bu $a2, $a0, 0
157-
; LA32-NEXT: ld.bu $a3, $a0, 2
158-
; LA32-NEXT: ld.bu $a0, $a0, 3
159-
; LA32-NEXT: slli.w $a1, $a1, 8
160-
; LA32-NEXT: or $a1, $a1, $a2
161-
; LA32-NEXT: slli.w $a2, $a3, 16
162-
; LA32-NEXT: slli.w $a0, $a0, 24
163-
; LA32-NEXT: or $a0, $a0, $a2
164-
; LA32-NEXT: or $a0, $a0, $a1
115+
; LA32-NEXT: st.w $a2, $sp, 8
116+
; LA32-NEXT: st.w $a3, $sp, 4
165117
; LA32-NEXT: st.w $a0, $sp, 0
166118
; LA32-NEXT: addi.w $sp, $sp, 64
167119
; LA32-NEXT: ret
@@ -172,67 +124,13 @@ define void @t3() {
172124
; LA64-NEXT: .cfi_def_cfa_offset 64
173125
; LA64-NEXT: pcalau12i $a0, %pc_hi20(.L.str)
174126
; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(.L.str)
175-
; LA64-NEXT: ld.b $a1, $a0, 21
176-
; LA64-NEXT: ld.bu $a2, $a0, 20
177-
; LA64-NEXT: slli.d $a1, $a1, 8
178-
; LA64-NEXT: or $a1, $a1, $a2
127+
; LA64-NEXT: ld.h $a1, $a0, 20
128+
; LA64-NEXT: ld.w $a2, $a0, 16
129+
; LA64-NEXT: ld.d $a3, $a0, 8
130+
; LA64-NEXT: ld.d $a0, $a0, 0
179131
; LA64-NEXT: st.h $a1, $sp, 20
180-
; LA64-NEXT: ld.bu $a1, $a0, 17
181-
; LA64-NEXT: ld.bu $a2, $a0, 16
182-
; LA64-NEXT: ld.bu $a3, $a0, 18
183-
; LA64-NEXT: ld.b $a4, $a0, 19
184-
; LA64-NEXT: slli.d $a1, $a1, 8
185-
; LA64-NEXT: or $a1, $a1, $a2
186-
; LA64-NEXT: slli.d $a2, $a3, 16
187-
; LA64-NEXT: slli.d $a3, $a4, 24
188-
; LA64-NEXT: or $a2, $a3, $a2
189-
; LA64-NEXT: or $a1, $a2, $a1
190-
; LA64-NEXT: st.w $a1, $sp, 16
191-
; LA64-NEXT: ld.bu $a1, $a0, 9
192-
; LA64-NEXT: ld.bu $a2, $a0, 8
193-
; LA64-NEXT: ld.bu $a3, $a0, 10
194-
; LA64-NEXT: ld.bu $a4, $a0, 11
195-
; LA64-NEXT: slli.d $a1, $a1, 8
196-
; LA64-NEXT: or $a1, $a1, $a2
197-
; LA64-NEXT: slli.d $a2, $a3, 16
198-
; LA64-NEXT: slli.d $a3, $a4, 24
199-
; LA64-NEXT: or $a2, $a3, $a2
200-
; LA64-NEXT: or $a1, $a2, $a1
201-
; LA64-NEXT: ld.bu $a2, $a0, 13
202-
; LA64-NEXT: ld.bu $a3, $a0, 12
203-
; LA64-NEXT: ld.bu $a4, $a0, 14
204-
; LA64-NEXT: ld.bu $a5, $a0, 15
205-
; LA64-NEXT: slli.d $a2, $a2, 8
206-
; LA64-NEXT: or $a2, $a2, $a3
207-
; LA64-NEXT: slli.d $a3, $a4, 16
208-
; LA64-NEXT: slli.d $a4, $a5, 24
209-
; LA64-NEXT: or $a3, $a4, $a3
210-
; LA64-NEXT: or $a2, $a3, $a2
211-
; LA64-NEXT: slli.d $a2, $a2, 32
212-
; LA64-NEXT: or $a1, $a2, $a1
213-
; LA64-NEXT: st.d $a1, $sp, 8
214-
; LA64-NEXT: ld.bu $a1, $a0, 1
215-
; LA64-NEXT: ld.bu $a2, $a0, 0
216-
; LA64-NEXT: ld.bu $a3, $a0, 2
217-
; LA64-NEXT: ld.bu $a4, $a0, 3
218-
; LA64-NEXT: slli.d $a1, $a1, 8
219-
; LA64-NEXT: or $a1, $a1, $a2
220-
; LA64-NEXT: slli.d $a2, $a3, 16
221-
; LA64-NEXT: slli.d $a3, $a4, 24
222-
; LA64-NEXT: or $a2, $a3, $a2
223-
; LA64-NEXT: or $a1, $a2, $a1
224-
; LA64-NEXT: ld.bu $a2, $a0, 5
225-
; LA64-NEXT: ld.bu $a3, $a0, 4
226-
; LA64-NEXT: ld.bu $a4, $a0, 6
227-
; LA64-NEXT: ld.bu $a0, $a0, 7
228-
; LA64-NEXT: slli.d $a2, $a2, 8
229-
; LA64-NEXT: or $a2, $a2, $a3
230-
; LA64-NEXT: slli.d $a3, $a4, 16
231-
; LA64-NEXT: slli.d $a0, $a0, 24
232-
; LA64-NEXT: or $a0, $a0, $a3
233-
; LA64-NEXT: or $a0, $a0, $a2
234-
; LA64-NEXT: slli.d $a0, $a0, 32
235-
; LA64-NEXT: or $a0, $a0, $a1
132+
; LA64-NEXT: st.w $a2, $sp, 16
133+
; LA64-NEXT: st.d $a3, $sp, 8
236134
; LA64-NEXT: st.d $a0, $sp, 0
237135
; LA64-NEXT: addi.d $sp, $sp, 64
238136
; LA64-NEXT: ret

0 commit comments

Comments
 (0)