Skip to content

Commit c65b4d6

Browse files
committed
[SelectionDAG] Do not second-guess alignment for alloca
Alignment of an alloca in IR can be lower than the preferred alignment on purpose, but this override essentially treats the preferred alignment as the minimum alignment. The patch changes this behavior to always use the specified alignment. If alignment is not set explicitly in LLVM IR, it is set to DL.getPrefTypeAlign(Ty) in computeAllocaDefaultAlign. Tests are changed as well: explicit alignment is increased to match the preferred alignment if it changes output, or omitted when it is hard to determine the right value (e.g. for pointers, some structs, or weird types). Differential Revision: https://reviews.llvm.org/D135462
1 parent 5e71ca3 commit c65b4d6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+134
-147
lines changed

llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -128,20 +128,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
128128
for (const Instruction &I : BB) {
129129
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
130130
Type *Ty = AI->getAllocatedType();
131-
Align TyPrefAlign = MF->getDataLayout().getPrefTypeAlign(Ty);
132-
// The "specified" alignment is the alignment written on the alloca,
133-
// or the preferred alignment of the type if none is specified.
134-
//
135-
// (Unspecified alignment on allocas will be going away soon.)
136-
Align SpecifiedAlign = AI->getAlign();
137-
138-
// If the preferred alignment of the type is higher than the specified
139-
// alignment of the alloca, promote the alignment, as long as it doesn't
140-
// require realigning the stack.
141-
//
142-
// FIXME: Do we really want to second-guess the IR in isel?
143-
Align Alignment =
144-
std::max(std::min(TyPrefAlign, StackAlign), SpecifiedAlign);
131+
Align Alignment = AI->getAlign();
145132

146133
// Static allocas can be folded into the initial stack frame
147134
// adjustment. For targets that don't realign the stack, don't

llvm/test/CodeGen/AArch64/preferred-alignment.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
; Function Attrs: nounwind
44
define i32 @foo() #0 {
55
entry:
6-
%c = alloca i8, align 1
6+
%c = alloca i8
77
; CHECK: add x0, sp, #12
8-
%s = alloca i16, align 2
8+
%s = alloca i16
99
; CHECK-NEXT: add x1, sp, #8
10-
%i = alloca i32, align 4
10+
%i = alloca i32
1111
; CHECK-NEXT: add x2, sp, #4
1212
%call = call i32 @bar(ptr %c, ptr %s, ptr %i)
1313
%0 = load i8, ptr %c, align 1

llvm/test/CodeGen/AArch64/seh-finally.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ entry:
4242
; CHECK: ldur w0, [x29, #-8]
4343
; CHECK: bl foo
4444

45-
%o = alloca %struct.S, align 4
45+
%o = alloca %struct.S, align 8
4646
call void (...) @llvm.localescape(ptr %o)
4747
%0 = load i32, ptr %o, align 4
4848
invoke void @foo(i32 %0) #5

llvm/test/CodeGen/AMDGPU/call-argument-types.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -671,7 +671,7 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
671671
; GCN-NEXT: s_swappc_b64
672672
; GCN-NOT: [[SP]]
673673
define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
674-
%val = alloca { i8, i32 }, align 4, addrspace(5)
674+
%val = alloca { i8, i32 }, align 8, addrspace(5)
675675
%gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0
676676
%gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1
677677
store i8 3, ptr addrspace(5) %gep0
@@ -702,8 +702,8 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
702702
; GCN: buffer_store_byte [[LOAD_OUT_VAL0]], off
703703
; GCN: buffer_store_dword [[LOAD_OUT_VAL1]], off
704704
define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
705-
%in.val = alloca { i8, i32 }, align 4, addrspace(5)
706-
%out.val = alloca { i8, i32 }, align 4, addrspace(5)
705+
%in.val = alloca { i8, i32 }, align 8, addrspace(5)
706+
%out.val = alloca { i8, i32 }, align 8, addrspace(5)
707707
%in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
708708
%in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1
709709
store i8 3, ptr addrspace(5) %in.gep0

llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ bb5:
289289

290290
; GCN: ds_write_b32 v{{[0-9]+}}, [[PTR]]
291291
define void @alloca_ptr_nonentry_block(i32 %arg0) #0 {
292-
%alloca0 = alloca { i8, i32 }, align 4, addrspace(5)
292+
%alloca0 = alloca { i8, i32 }, align 8, addrspace(5)
293293
%cmp = icmp eq i32 %arg0, 0
294294
br i1 %cmp, label %bb, label %ret
295295

llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11098,7 +11098,7 @@ entry:
1109811098
%tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo)
1109911099

1110011100
; allocate enough scratch to go beyond 2^12 addressing
11101-
%scratch = alloca <1280 x i32>, align 8, addrspace(5)
11101+
%scratch = alloca <1280 x i32>, align 16, addrspace(5)
1110211102

1110311103
; load VGPR data
1110411104
%aptr = getelementptr <64 x i32>, ptr addrspace(1) %in, i32 %tid

llvm/test/CodeGen/ARM/ssp-data-layout.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,8 +386,8 @@ entry:
386386
; CHECK: bl get_struct_large_char2
387387
; CHECK: strb r0, [sp, #106]
388388
; CHECK: bl end_struct_large_char2
389-
%a = alloca %struct.struct_small_char, align 1
390-
%b = alloca %struct.struct_large_char2, align 1
389+
%a = alloca %struct.struct_small_char, align 4
390+
%b = alloca %struct.struct_large_char2, align 4
391391
%d1 = alloca %struct.struct_large_nonchar, align 8
392392
%d2 = alloca %struct.struct_small_nonchar, align 2
393393
%call = call signext i8 @get_struct_small_char()

llvm/test/CodeGen/BPF/pr57872.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ define void @foo(ptr %g) {
180180
; CHECK-NEXT: call bar
181181
; CHECK-NEXT: exit
182182
entry:
183-
%event = alloca %struct.event, align 1
183+
%event = alloca %struct.event, align 8
184184
%hostname = getelementptr inbounds %struct.event, ptr %event, i64 0, i32 1
185185
%0 = load ptr, ptr %g, align 8
186186
call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(84) %hostname, ptr noundef nonnull align 1 dereferenceable(84) %0, i64 84, i1 false)

llvm/test/CodeGen/BPF/undef.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ define i32 @ebpf_filter(ptr nocapture readnone %ebpf_packet) #0 section "socket1
4040
; CHECK: r1 = routing
4141
; CHECK: call bpf_map_lookup_elem
4242
; CHECK: exit
43-
%key = alloca %struct.routing_key_2, align 1
43+
%key = alloca %struct.routing_key_2, align 8
4444
store i8 5, ptr %key, align 1
4545
%1 = getelementptr inbounds %struct.routing_key_2, ptr %key, i64 0, i32 0, i64 1
4646
store i8 6, ptr %1, align 1

llvm/test/CodeGen/Mips/Fast-ISel/fastalloca.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ entry:
1010
; CHECK-LABEL: foobar:
1111
%retval = alloca i32, align 4
1212
%x.addr = alloca i32, align 4
13-
%a = alloca %struct.x, align 4
14-
%c = alloca ptr, align 4
13+
%a = alloca %struct.x, align 8
14+
%c = alloca ptr, align 8
1515
store i32 %x, ptr %x.addr, align 4
1616
%0 = load i32, ptr %x.addr, align 4
1717
store i32 %0, ptr %a, align 4

llvm/test/CodeGen/Mips/atomic64.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,7 @@ define i64 @AtomicSwap64(i64 signext %newval) nounwind {
11451145
; MIPS64EB-NEXT: jr $ra
11461146
; MIPS64EB-NEXT: daddiu $sp, $sp, 16
11471147
entry:
1148-
%newval.addr = alloca i64, align 4
1148+
%newval.addr = alloca i64, align 8
11491149
store i64 %newval, ptr %newval.addr, align 4
11501150
%tmp = load i64, ptr %newval.addr, align 4
11511151
%0 = atomicrmw xchg ptr @x, i64 %tmp monotonic
@@ -1359,7 +1359,7 @@ define i64 @AtomicCmpSwap64(i64 signext %oldval, i64 signext %newval) nounwind {
13591359
; MIPS64EB-NEXT: jr $ra
13601360
; MIPS64EB-NEXT: daddiu $sp, $sp, 16
13611361
entry:
1362-
%newval.addr = alloca i64, align 4
1362+
%newval.addr = alloca i64, align 8
13631363
store i64 %newval, ptr %newval.addr, align 4
13641364
%tmp = load i64, ptr %newval.addr, align 4
13651365
%0 = cmpxchg ptr @x, i64 %oldval, i64 %tmp monotonic monotonic

llvm/test/CodeGen/Mips/cconv/byval.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ define dso_local void @g() #0 {
151151
; N64-NEXT: jr $ra
152152
; N64-NEXT: daddu $sp, $sp, $1
153153
entry:
154-
%a = alloca %struct.S1, align 4
154+
%a = alloca %struct.S1, align 8
155155
call void @f2(ptr byval(%struct.S1) align 4 %a)
156156
ret void
157157
}
@@ -340,8 +340,8 @@ define dso_local void @g2(ptr %a) {
340340
; N64-NEXT: jr $ra
341341
; N64-NEXT: daddu $sp, $sp, $1
342342
entry:
343-
%a.addr = alloca ptr, align 4
344-
%byval-temp = alloca %struct.S1, align 4
343+
%a.addr = alloca ptr
344+
%byval-temp = alloca %struct.S1, align 8
345345
store ptr %a, ptr %a.addr, align 4
346346
%0 = load ptr, ptr %a.addr, align 4
347347
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %byval-temp, ptr align 1 %0, i32 65520, i1 false)
@@ -410,8 +410,8 @@ define dso_local i32 @g3(ptr %a, ptr %b) #0 {
410410
; N64-NEXT: jr $ra
411411
; N64-NEXT: daddiu $sp, $sp, 32
412412
entry:
413-
%a.addr = alloca ptr, align 4
414-
%b.addr = alloca ptr, align 4
413+
%a.addr = alloca ptr
414+
%b.addr = alloca ptr
415415
store ptr %a, ptr %a.addr, align 4
416416
store ptr %b, ptr %b.addr, align 4
417417
%0 = load ptr, ptr %a.addr, align 4

llvm/test/CodeGen/Mips/cconv/return-struct.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ define inreg {i16} @ret_struct_i16() nounwind {
139139
; N64-LE-NEXT: jr $ra
140140
; N64-LE-NEXT: daddiu $sp, $sp, 16
141141
entry:
142-
%retval = alloca {i8,i8}, align 1
142+
%retval = alloca {i8,i8}, align 8
143143
call void @llvm.memcpy.p0.p0.i64(ptr %retval, ptr @struct_2byte, i64 2, i1 false)
144144
%0 = load volatile {i16}, ptr %retval
145145
ret {i16} %0

llvm/test/CodeGen/Mips/largeimmprinting.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ entry:
2424
; 64: daddu $[[R1]], $sp, $[[R1]]
2525
; 64: sd $ra, 24($[[R1]])
2626

27-
%agg.tmp = alloca %struct.S1, align 1
27+
%agg.tmp = alloca %struct.S1, align 8
2828
call void @llvm.memcpy.p0.p0.i32(ptr align 1 %agg.tmp, ptr align 1 @s1, i32 65536, i1 false)
2929
call void @f2(ptr byval(%struct.S1) %agg.tmp) nounwind
3030
ret void

llvm/test/CodeGen/Mips/o32_cc_byval.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ define void @f1() nounwind {
8080
; CHECK-NEXT: jr $ra
8181
; CHECK-NEXT: addiu $sp, $sp, 64
8282
entry:
83-
%agg.tmp10 = alloca %struct.S3, align 4
83+
%agg.tmp10 = alloca %struct.S3, align 8
8484
call void @callee1(float 2.000000e+01, ptr byval(%struct.S1) @f1.s1) nounwind
8585
call void @callee2(ptr byval(%struct.S2) @f1.s2) nounwind
8686
store i8 11, ptr %agg.tmp10, align 4

llvm/test/CodeGen/NVPTX/lower-byval-args.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ bb:
118118

119119
; Verify that if the pointer escapes, then we do fall back onto using a temp copy.
120120
; CHECK-LABEL: .visible .entry pointer_escapes
121-
; CHECK: .local .align 8 .b8 __local_depot{{.*}}
121+
; CHECK: .local .align 4 .b8 __local_depot{{.*}}
122122
; CHECK64: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0]
123123
; CHECK64: add.u64 %[[copy_addr:rd[0-9]+]], %SPL, 0;
124124
; CHECK32: ld.param.u32 [[result_addr:%r[0-9]+]], [{{.*}}_param_0]

llvm/test/CodeGen/PowerPC/aix-cc-byval.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ entry:
353353
define void @call_test_byval_4Byte() {
354354
entry:
355355
%s0 = alloca %struct.S0, align 8
356-
%s4a = alloca %struct.S4A, align 4
356+
%s4a = alloca %struct.S4A, align 8
357357
%call = call signext i32 @test_byval_4Byte(ptr byval(%struct.S4) align 1 @gS4, ptr byval(%struct.S0) align 1 %s0, ptr byval(%struct.S4A) align 4 %s4a)
358358
ret void
359359
}
@@ -945,7 +945,7 @@ entry:
945945

946946
define i32 @call_test_byval_homogeneous_float_struct() {
947947
entry:
948-
%s = alloca %struct.F, align 4
948+
%s = alloca %struct.F, align 8
949949
call void @llvm.memset.p0.i32(ptr align 4 %s, i8 0, i32 12, i1 false)
950950
%call = call i32 @test_byval_homogeneous_float_struct(ptr byval(%struct.F) align 4 %s)
951951
ret i32 %call

llvm/test/CodeGen/PowerPC/aix-sret-param.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
define void @test1() {
1919
entry:
20-
%s = alloca %struct.S, align 4
20+
%s = alloca %struct.S, align 8
2121
call void @foo(ptr sret(%struct.S) %s)
2222
ret void
2323
}

llvm/test/CodeGen/PowerPC/byval.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ define dso_local i32 @bar() {
3434
; CHECK-NEXT: mtlr 0
3535
; CHECK-NEXT: blr
3636
entry:
37-
%x = alloca %struct, align 4
37+
%x = alloca %struct, align 8
3838
call void @foo(ptr %x)
3939
%r = call i32 @foo1(ptr byval(%struct) %x)
4040
ret i32 %r

llvm/test/CodeGen/PowerPC/structsinregs.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,13 @@ target triple = "powerpc64-unknown-linux-gnu"
3535

3636
define i32 @caller1() nounwind {
3737
entry:
38-
%p1 = alloca %struct.s1, align 1
39-
%p2 = alloca %struct.s2, align 2
40-
%p3 = alloca %struct.s3, align 2
41-
%p4 = alloca %struct.s4, align 4
42-
%p5 = alloca %struct.s5, align 4
43-
%p6 = alloca %struct.s6, align 4
44-
%p7 = alloca %struct.s7, align 4
38+
%p1 = alloca %struct.s1
39+
%p2 = alloca %struct.s2
40+
%p3 = alloca %struct.s3
41+
%p4 = alloca %struct.s4
42+
%p5 = alloca %struct.s5
43+
%p6 = alloca %struct.s6
44+
%p7 = alloca %struct.s7
4545
call void @llvm.memcpy.p0.p0.i64(ptr %p1, ptr @caller1.p1, i64 1, i1 false)
4646
call void @llvm.memcpy.p0.p0.i64(ptr align 2 %p2, ptr align 2 @caller1.p2, i64 2, i1 false)
4747
call void @llvm.memcpy.p0.p0.i64(ptr align 2 %p3, ptr align 2 @caller1.p3, i64 4, i1 false)
@@ -103,13 +103,13 @@ entry:
103103

104104
define i32 @caller2() nounwind {
105105
entry:
106-
%p1 = alloca %struct.t1, align 1
107-
%p2 = alloca %struct.t2, align 1
108-
%p3 = alloca %struct.t3, align 1
109-
%p4 = alloca %struct.t4, align 1
110-
%p5 = alloca %struct.t5, align 1
111-
%p6 = alloca %struct.t6, align 1
112-
%p7 = alloca %struct.t7, align 1
106+
%p1 = alloca %struct.t1
107+
%p2 = alloca %struct.t2
108+
%p3 = alloca %struct.t3
109+
%p4 = alloca %struct.t4
110+
%p5 = alloca %struct.t5
111+
%p6 = alloca %struct.t6
112+
%p7 = alloca %struct.t7
113113
call void @llvm.memcpy.p0.p0.i64(ptr %p1, ptr @caller2.p1, i64 1, i1 false)
114114
call void @llvm.memcpy.p0.p0.i64(ptr %p2, ptr @caller2.p2, i64 2, i1 false)
115115
call void @llvm.memcpy.p0.p0.i64(ptr %p3, ptr @caller2.p3, i64 3, i1 false)

llvm/test/CodeGen/PowerPC/varargs-struct-float.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ target triple = "powerpc64-unknown-linux-gnu"
77

88
define void @foo(float inreg %s.coerce) nounwind {
99
entry:
10-
%s = alloca %struct.Sf1, align 4
10+
%s = alloca %struct.Sf1, align 8
1111
store float %s.coerce, ptr %s, align 1
1212
%0 = load float, ptr %s, align 1
1313
call void (i32, ...) @testvaSf1(i32 1, float inreg %0)

llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ define i32 @caller_large_struct() nounwind {
594594
; RV32I-WITHFP-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
595595
; RV32I-WITHFP-NEXT: addi sp, sp, 48
596596
; RV32I-WITHFP-NEXT: ret
597-
%ls = alloca %struct.large, align 4
597+
%ls = alloca %struct.large, align 8
598598
store i32 1, ptr %ls
599599
%b = getelementptr inbounds %struct.large, ptr %ls, i32 0, i32 1
600600
store i32 2, ptr %b

llvm/test/CodeGen/RISCV/frame.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ define i32 @test() nounwind {
4141
; RV32I-WITHFP-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
4242
; RV32I-WITHFP-NEXT: addi sp, sp, 32
4343
; RV32I-WITHFP-NEXT: ret
44-
%key = alloca %struct.key_t, align 4
44+
%key = alloca %struct.key_t, align 8
4545
call void @llvm.memset.p0.i64(ptr align 4 %key, i8 0, i64 20, i1 false)
4646
%1 = getelementptr inbounds %struct.key_t, ptr %key, i64 0, i32 1, i64 0
4747
call void @test1(ptr %1)

llvm/test/CodeGen/RISCV/mem64.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ define void @addi_fold_crash(i64 %arg) nounwind {
368368
; RV64I-NEXT: addi sp, sp, 16
369369
; RV64I-NEXT: ret
370370
bb:
371-
%tmp = alloca %struct.quux, align 4
371+
%tmp = alloca %struct.quux, align 8
372372
%tmp1 = getelementptr inbounds %struct.quux, ptr %tmp, i64 0, i32 1
373373
%tmp2 = getelementptr inbounds %struct.quux, ptr %tmp, i64 0, i32 1, i64 %arg
374374
store i8 0, ptr %tmp2, align 1

0 commit comments

Comments
 (0)