Skip to content

Commit 0f152a5

Browse files
committed
[InferAlignment] Implement InferAlignmentPass
This pass aims to infer alignment for instructions as a separate pass, to reduce redundant work done by InstCombine running multiple times. It runs late in the pipeline, just before the back-end passes where this information is most useful. Differential Revision: https://reviews.llvm.org/D158529
1 parent 3978f37 commit 0f152a5

20 files changed

+250
-117
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
//===- InferAlignment.h -----------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Infer alignment for load, stores and other memory operations based on
10+
// trailing zero known bits information.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifndef LLVM_TRANSFORMS_SCALAR_INFERALIGNMENT_H
15+
#define LLVM_TRANSFORMS_SCALAR_INFERALIGNMENT_H
16+
17+
#include "llvm/IR/PassManager.h"
18+
19+
namespace llvm {
20+
21+
struct InferAlignmentPass : public PassInfoMixin<InferAlignmentPass> {
22+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
23+
};
24+
25+
} // namespace llvm
26+
27+
#endif // LLVM_TRANSFORMS_SCALAR_INFERALIGNMENT_H

llvm/include/llvm/Transforms/Utils/Local.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,15 @@ AllocaInst *DemoteRegToStack(Instruction &X,
213213
/// deleted and it returns the pointer to the alloca inserted.
214214
AllocaInst *DemotePHIToStack(PHINode *P, Instruction *AllocaPoint = nullptr);
215215

216+
/// If the specified pointer points to an object that we control, try to modify
217+
/// the object's alignment to PrefAlign. Returns a minimum known alignment of
218+
/// the value after the operation, which may be lower than PrefAlign.
219+
///
220+
/// Increating value alignment isn't often possible though. If alignment is
221+
/// important, a more reliable approach is to simply align all global variables
222+
/// and allocation instructions to their preferred alignment from the beginning.
223+
Align tryEnforceAlignment(Value *V, Align PrefAlign, const DataLayout &DL);
224+
216225
/// Try to ensure that the alignment of \p V is at least \p PrefAlign bytes. If
217226
/// the owning object can be modified and has an alignment less than \p
218227
/// PrefAlign, it will be increased and \p PrefAlign returned. If the alignment

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@
171171
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
172172
#include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h"
173173
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
174+
#include "llvm/Transforms/Scalar/InferAlignment.h"
174175
#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
175176
#include "llvm/Transforms/Scalar/JumpThreading.h"
176177
#include "llvm/Transforms/Scalar/LICM.h"

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
#include "llvm/Transforms/Scalar/Float2Int.h"
8989
#include "llvm/Transforms/Scalar/GVN.h"
9090
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
91+
#include "llvm/Transforms/Scalar/InferAlignment.h"
9192
#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
9293
#include "llvm/Transforms/Scalar/JumpThreading.h"
9394
#include "llvm/Transforms/Scalar/LICM.h"
@@ -274,6 +275,11 @@ cl::opt<bool> EnableMemProfContextDisambiguation(
274275
"enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
275276
cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"));
276277

278+
cl::opt<bool> EnableInferAlignmentPass(
279+
"enable-infer-alignment-pass", cl::init(false), cl::Hidden, cl::ZeroOrMore,
280+
cl::desc("Enable the InferAlignment pass, disabling alignment inference in "
281+
"InstCombine"));
282+
277283
PipelineTuningOptions::PipelineTuningOptions() {
278284
LoopInterleaving = true;
279285
LoopVectorization = true;
@@ -1140,6 +1146,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
11401146
FPM.addPass(LoopVectorizePass(
11411147
LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
11421148

1149+
if (EnableInferAlignmentPass)
1150+
FPM.addPass(InferAlignmentPass());
11431151
if (IsFullLTO) {
11441152
// The vectorizer may have significantly shortened a loop body; unroll
11451153
// again. Unroll small loops to hide loop backedge latency and saturate any
@@ -1257,6 +1265,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
12571265
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
12581266
}
12591267

1268+
if (EnableInferAlignmentPass)
1269+
FPM.addPass(InferAlignmentPass());
12601270
FPM.addPass(InstCombinePass());
12611271

12621272
// This is needed for two reasons:

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ FUNCTION_PASS("gvn-hoist", GVNHoistPass())
337337
FUNCTION_PASS("gvn-sink", GVNSinkPass())
338338
FUNCTION_PASS("helloworld", HelloWorldPass())
339339
FUNCTION_PASS("infer-address-spaces", InferAddressSpacesPass())
340+
FUNCTION_PASS("infer-alignment", InferAlignmentPass())
340341
FUNCTION_PASS("instcount", InstCountPass())
341342
FUNCTION_PASS("instsimplify", InstSimplifyPass())
342343
FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())

llvm/lib/Transforms/Scalar/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ add_llvm_component_library(LLVMScalarOpts
2222
InductiveRangeCheckElimination.cpp
2323
IndVarSimplify.cpp
2424
InferAddressSpaces.cpp
25+
InferAlignment.cpp
2526
InstSimplifyPass.cpp
2627
JumpThreading.cpp
2728
LICM.cpp
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
//===- InferAlignment.cpp -------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Infer alignment for load, stores and other memory operations based on
10+
// trailing zero known bits information.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "llvm/Transforms/Scalar/InferAlignment.h"
15+
#include "llvm/Analysis/AssumptionCache.h"
16+
#include "llvm/Analysis/ValueTracking.h"
17+
#include "llvm/IR/Instructions.h"
18+
#include "llvm/InitializePasses.h"
19+
#include "llvm/Support/KnownBits.h"
20+
#include "llvm/Transforms/Scalar.h"
21+
#include "llvm/Transforms/Utils/Local.h"
22+
23+
using namespace llvm;
24+
25+
static bool tryToImproveAlign(
26+
const DataLayout &DL, Instruction *I,
27+
function_ref<Align(Value *PtrOp, Align OldAlign, Align PrefAlign)> Fn) {
28+
if (auto *LI = dyn_cast<LoadInst>(I)) {
29+
Value *PtrOp = LI->getPointerOperand();
30+
Align OldAlign = LI->getAlign();
31+
Align NewAlign = Fn(PtrOp, OldAlign, DL.getPrefTypeAlign(LI->getType()));
32+
if (NewAlign > OldAlign) {
33+
LI->setAlignment(NewAlign);
34+
return true;
35+
}
36+
} else if (auto *SI = dyn_cast<StoreInst>(I)) {
37+
Value *PtrOp = SI->getPointerOperand();
38+
Value *ValOp = SI->getValueOperand();
39+
Align OldAlign = SI->getAlign();
40+
Align NewAlign = Fn(PtrOp, OldAlign, DL.getPrefTypeAlign(ValOp->getType()));
41+
if (NewAlign > OldAlign) {
42+
SI->setAlignment(NewAlign);
43+
return true;
44+
}
45+
}
46+
// TODO: Also handle memory intrinsics.
47+
return false;
48+
}
49+
50+
bool inferAlignment(Function &F, AssumptionCache &AC, DominatorTree &DT) {
51+
const DataLayout &DL = F.getParent()->getDataLayout();
52+
bool Changed = false;
53+
54+
// Enforce preferred type alignment if possible. We do this as a separate
55+
// pass first, because it may improve the alignments we infer below.
56+
for (BasicBlock &BB : F) {
57+
for (Instruction &I : BB) {
58+
Changed |= tryToImproveAlign(
59+
DL, &I, [&](Value *PtrOp, Align OldAlign, Align PrefAlign) {
60+
if (PrefAlign > OldAlign)
61+
return std::max(OldAlign,
62+
tryEnforceAlignment(PtrOp, PrefAlign, DL));
63+
return OldAlign;
64+
});
65+
}
66+
}
67+
68+
// Compute alignment from known bits.
69+
for (BasicBlock &BB : F) {
70+
for (Instruction &I : BB) {
71+
Changed |= tryToImproveAlign(
72+
DL, &I, [&](Value *PtrOp, Align OldAlign, Align PrefAlign) {
73+
KnownBits Known = computeKnownBits(PtrOp, DL, 0, &AC, &I, &DT);
74+
unsigned TrailZ = std::min(Known.countMinTrailingZeros(),
75+
+Value::MaxAlignmentExponent);
76+
return Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ));
77+
});
78+
}
79+
}
80+
81+
return Changed;
82+
}
83+
84+
PreservedAnalyses InferAlignmentPass::run(Function &F,
85+
FunctionAnalysisManager &AM) {
86+
AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
87+
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
88+
inferAlignment(F, AC, DT);
89+
// Changes to alignment shouldn't invalidated analyses.
90+
return PreservedAnalyses::all();
91+
}

llvm/lib/Transforms/Utils/Local.cpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1388,15 +1388,8 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
13881388
return Changed;
13891389
}
13901390

1391-
/// If the specified pointer points to an object that we control, try to modify
1392-
/// the object's alignment to PrefAlign. Returns a minimum known alignment of
1393-
/// the value after the operation, which may be lower than PrefAlign.
1394-
///
1395-
/// Increating value alignment isn't often possible though. If alignment is
1396-
/// important, a more reliable approach is to simply align all global variables
1397-
/// and allocation instructions to their preferred alignment from the beginning.
1398-
static Align tryEnforceAlignment(Value *V, Align PrefAlign,
1399-
const DataLayout &DL) {
1391+
Align llvm::tryEnforceAlignment(Value *V, Align PrefAlign,
1392+
const DataLayout &DL) {
14001393
V = V->stripPointerCasts();
14011394

14021395
if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {

llvm/test/Transforms/InferAlignment/alloca.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2-
; RUN: opt < %s -passes=no-op-function -S | FileCheck %s
2+
; RUN: opt < %s -passes=infer-alignment -S | FileCheck %s
33

44
; ------------------------------------------------------------------------------
55
; Scalar type
@@ -8,11 +8,11 @@
88
define void @alloca_local(i8 %x, i32 %y) {
99
; CHECK-LABEL: define void @alloca_local
1010
; CHECK-SAME: (i8 [[X:%.*]], i32 [[Y:%.*]]) {
11-
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 1
12-
; CHECK-NEXT: [[LOAD_I8:%.*]] = load i8, ptr [[ALLOCA]], align 1
13-
; CHECK-NEXT: [[LOAD_I32:%.*]] = load i32, ptr [[ALLOCA]], align 1
14-
; CHECK-NEXT: store i8 [[X]], ptr [[ALLOCA]], align 1
15-
; CHECK-NEXT: store i32 [[Y]], ptr [[ALLOCA]], align 1
11+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4
12+
; CHECK-NEXT: [[LOAD_I8:%.*]] = load i8, ptr [[ALLOCA]], align 4
13+
; CHECK-NEXT: [[LOAD_I32:%.*]] = load i32, ptr [[ALLOCA]], align 4
14+
; CHECK-NEXT: store i8 [[X]], ptr [[ALLOCA]], align 4
15+
; CHECK-NEXT: store i32 [[Y]], ptr [[ALLOCA]], align 4
1616
; CHECK-NEXT: ret void
1717
;
1818
%alloca = alloca i32, align 1
@@ -38,10 +38,10 @@ define void @alloca_struct(i32 %x) {
3838
; CHECK-NEXT: [[ALLOCA_STRUCT:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 8
3939
; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[STRUCT_PAIR]], ptr [[ALLOCA_STRUCT]], i64 0, i32 1
4040
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { i32, i32 }, ptr [[GEP_0]], i64 0, i32 1
41-
; CHECK-NEXT: [[LOAD_2:%.*]] = load i32, ptr [[GEP_0]], align 1
42-
; CHECK-NEXT: store i32 0, ptr [[GEP_0]], align 1
43-
; CHECK-NEXT: [[LOAD_1:%.*]] = load i32, ptr [[GEP_1]], align 1
44-
; CHECK-NEXT: store i32 0, ptr [[GEP_1]], align 1
41+
; CHECK-NEXT: [[LOAD_2:%.*]] = load i32, ptr [[GEP_0]], align 8
42+
; CHECK-NEXT: store i32 0, ptr [[GEP_0]], align 8
43+
; CHECK-NEXT: [[LOAD_1:%.*]] = load i32, ptr [[GEP_1]], align 4
44+
; CHECK-NEXT: store i32 0, ptr [[GEP_1]], align 4
4545
; CHECK-NEXT: ret void
4646
;
4747
%alloca.struct = alloca %struct.pair

llvm/test/Transforms/InferAlignment/atomic.ll

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2-
; RUN: opt -S < %s -passes=no-op-function | FileCheck %s
2+
; RUN: opt -S < %s -passes=infer-alignment | FileCheck %s
33

44
; ------------------------------------------------------------------------------
55
; load/store of null
66
; ------------------------------------------------------------------------------
77

88
define void @load_null() {
99
; CHECK-LABEL: define void @load_null() {
10-
; CHECK-NEXT: [[X_0:%.*]] = load atomic i32, ptr null unordered, align 4
11-
; CHECK-NEXT: [[X_1:%.*]] = load atomic i32, ptr null monotonic, align 4
12-
; CHECK-NEXT: [[X_2:%.*]] = load atomic i32, ptr null seq_cst, align 4
10+
; CHECK-NEXT: [[X_0:%.*]] = load atomic i32, ptr null unordered, align 4294967296
11+
; CHECK-NEXT: [[X_1:%.*]] = load atomic i32, ptr null monotonic, align 4294967296
12+
; CHECK-NEXT: [[X_2:%.*]] = load atomic i32, ptr null seq_cst, align 4294967296
1313
; CHECK-NEXT: ret void
1414
;
1515
%x.0 = load atomic i32, ptr null unordered, align 4
@@ -20,9 +20,9 @@ define void @load_null() {
2020

2121
define void @store_null() {
2222
; CHECK-LABEL: define void @store_null() {
23-
; CHECK-NEXT: store atomic i32 0, ptr null unordered, align 4
24-
; CHECK-NEXT: store atomic i32 0, ptr null monotonic, align 4
25-
; CHECK-NEXT: store atomic i32 0, ptr null seq_cst, align 4
23+
; CHECK-NEXT: store atomic i32 0, ptr null unordered, align 4294967296
24+
; CHECK-NEXT: store atomic i32 0, ptr null monotonic, align 4294967296
25+
; CHECK-NEXT: store atomic i32 0, ptr null seq_cst, align 4294967296
2626
; CHECK-NEXT: ret void
2727
;
2828
store atomic i32 0, ptr null unordered, align 4
@@ -38,9 +38,9 @@ define void @store_null() {
3838

3939
define void @load_nonnull() {
4040
; CHECK-LABEL: define void @load_nonnull() {
41-
; CHECK-NEXT: [[X_0:%.*]] = load atomic i32, ptr @c unordered, align 4
42-
; CHECK-NEXT: [[X_1:%.*]] = load atomic i32, ptr @c monotonic, align 4
43-
; CHECK-NEXT: [[X_2:%.*]] = load atomic i32, ptr @c seq_cst, align 4
41+
; CHECK-NEXT: [[X_0:%.*]] = load atomic i32, ptr @c unordered, align 8
42+
; CHECK-NEXT: [[X_1:%.*]] = load atomic i32, ptr @c monotonic, align 8
43+
; CHECK-NEXT: [[X_2:%.*]] = load atomic i32, ptr @c seq_cst, align 8
4444
; CHECK-NEXT: ret void
4545
;
4646
%x.0 = load atomic i32, ptr @c unordered, align 4
@@ -51,9 +51,9 @@ define void @load_nonnull() {
5151

5252
define void @store_nonnull() {
5353
; CHECK-LABEL: define void @store_nonnull() {
54-
; CHECK-NEXT: store atomic i32 0, ptr @c unordered, align 4
55-
; CHECK-NEXT: store atomic i32 0, ptr @c monotonic, align 4
56-
; CHECK-NEXT: store atomic i32 0, ptr @c seq_cst, align 4
54+
; CHECK-NEXT: store atomic i32 0, ptr @c unordered, align 8
55+
; CHECK-NEXT: store atomic i32 0, ptr @c monotonic, align 8
56+
; CHECK-NEXT: store atomic i32 0, ptr @c seq_cst, align 8
5757
; CHECK-NEXT: ret void
5858
;
5959
store atomic i32 0, ptr @c unordered, align 4
@@ -69,9 +69,9 @@ define void @store_nonnull() {
6969
define void @load_alloca() {
7070
; CHECK-LABEL: define void @load_alloca() {
7171
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4
72-
; CHECK-NEXT: [[X_0:%.*]] = load atomic i32, ptr [[ALLOCA]] unordered, align 1
73-
; CHECK-NEXT: [[X_1:%.*]] = load atomic i32, ptr [[ALLOCA]] monotonic, align 1
74-
; CHECK-NEXT: [[X_2:%.*]] = load atomic i32, ptr [[ALLOCA]] seq_cst, align 1
72+
; CHECK-NEXT: [[X_0:%.*]] = load atomic i32, ptr [[ALLOCA]] unordered, align 4
73+
; CHECK-NEXT: [[X_1:%.*]] = load atomic i32, ptr [[ALLOCA]] monotonic, align 4
74+
; CHECK-NEXT: [[X_2:%.*]] = load atomic i32, ptr [[ALLOCA]] seq_cst, align 4
7575
; CHECK-NEXT: ret void
7676
;
7777
%alloca = alloca i32
@@ -84,9 +84,9 @@ define void @load_alloca() {
8484
define void @store_alloca() {
8585
; CHECK-LABEL: define void @store_alloca() {
8686
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4
87-
; CHECK-NEXT: store atomic i32 0, ptr [[ALLOCA]] unordered, align 1
88-
; CHECK-NEXT: store atomic i32 0, ptr [[ALLOCA]] monotonic, align 1
89-
; CHECK-NEXT: store atomic i32 0, ptr [[ALLOCA]] seq_cst, align 1
87+
; CHECK-NEXT: store atomic i32 0, ptr [[ALLOCA]] unordered, align 4
88+
; CHECK-NEXT: store atomic i32 0, ptr [[ALLOCA]] monotonic, align 4
89+
; CHECK-NEXT: store atomic i32 0, ptr [[ALLOCA]] seq_cst, align 4
9090
; CHECK-NEXT: ret void
9191
;
9292
%alloca = alloca i32

llvm/test/Transforms/InferAlignment/attributes.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2-
; RUN: opt < %s -passes=no-op-function -S | FileCheck %s
2+
; RUN: opt < %s -passes=infer-alignment -S | FileCheck %s
33

44
define void @attribute(ptr align 32 %a) {
55
; CHECK-LABEL: define void @attribute
66
; CHECK-SAME: (ptr align 32 [[A:%.*]]) {
7-
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A]], align 1
8-
; CHECK-NEXT: store i32 123, ptr [[A]], align 1
7+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A]], align 32
8+
; CHECK-NEXT: store i32 123, ptr [[A]], align 32
99
; CHECK-NEXT: ret void
1010
;
1111
%load = load i32, ptr %a, align 1
@@ -17,8 +17,8 @@ define void @attribute_through_call(ptr align 32 %a) {
1717
; CHECK-LABEL: define void @attribute_through_call
1818
; CHECK-SAME: (ptr align 32 [[A:%.*]]) {
1919
; CHECK-NEXT: [[RES:%.*]] = call ptr @call(ptr [[A]])
20-
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[RES]], align 1
21-
; CHECK-NEXT: store i32 123, ptr [[RES]], align 1
20+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[RES]], align 32
21+
; CHECK-NEXT: store i32 123, ptr [[RES]], align 32
2222
; CHECK-NEXT: ret void
2323
;
2424
%res = call ptr @call(ptr %a)
@@ -31,8 +31,8 @@ define void @attribute_return_value(ptr %a) {
3131
; CHECK-LABEL: define void @attribute_return_value
3232
; CHECK-SAME: (ptr [[A:%.*]]) {
3333
; CHECK-NEXT: [[RES:%.*]] = call align 32 ptr @call(ptr [[A]])
34-
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[RES]], align 1
35-
; CHECK-NEXT: store i32 123, ptr [[RES]], align 1
34+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[RES]], align 32
35+
; CHECK-NEXT: store i32 123, ptr [[RES]], align 32
3636
; CHECK-NEXT: ret void
3737
;
3838
%res = call align 32 ptr @call(ptr %a)

llvm/test/Transforms/InferAlignment/gep-2d.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2-
; RUN: opt < %s -passes=no-op-function -S | FileCheck %s
2+
; RUN: opt < %s -passes=infer-alignment -S | FileCheck %s
33

44
; A multi-dimensional array in a nested loop.inner doing vector stores that
55
; aren't yet aligned. InferAlignment can understand the addressing in the
@@ -21,8 +21,8 @@ define void @nested_loop() {
2121
; CHECK: loop.inner:
2222
; CHECK-NEXT: [[J:%.*]] = phi i64 [ 0, [[LOOP_OUTER]] ], [ [[J_NEXT:%.*]], [[LOOP_INNER_TAIL:%.*]] ]
2323
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr [1001 x [20000 x double]], ptr @Nice, i64 0, i64 [[I]], i64 [[J]]
24-
; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[GEP_1]], align 8
25-
; CHECK-NEXT: [[LOAD_1:%.*]] = load <2 x double>, ptr [[GEP_1]], align 8
24+
; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[GEP_1]], align 16
25+
; CHECK-NEXT: [[LOAD_1:%.*]] = load <2 x double>, ptr [[GEP_1]], align 16
2626
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr [1001 x [20001 x double]], ptr @Awkward, i64 0, i64 [[I]], i64 [[J]]
2727
; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[GEP_2]], align 8
2828
; CHECK-NEXT: [[LOAD_2:%.*]] = load <2 x double>, ptr [[GEP_2]], align 8

0 commit comments

Comments
 (0)