Skip to content

Commit 63fe80f

Browse files
authored
[SeperateConstOffsetFromGEP] Handle or disjoint flags (#76997)
This commit extends separate-const-offset-from-gep to look at the newly-added `disjoint` flag on `or` instructions so as to preserve additional opportunities for optimization. The tests were pre-committed in #76972.
1 parent a437347 commit 63fe80f

File tree

11 files changed

+59
-62
lines changed

11 files changed

+59
-62
lines changed

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@
174174
#include "llvm/IR/Function.h"
175175
#include "llvm/IR/GetElementPtrTypeIterator.h"
176176
#include "llvm/IR/IRBuilder.h"
177+
#include "llvm/IR/InstrTypes.h"
177178
#include "llvm/IR/Instruction.h"
178179
#include "llvm/IR/Instructions.h"
179180
#include "llvm/IR/Module.h"
@@ -235,18 +236,16 @@ class ConstantOffsetExtractor {
235236
/// \p UserChainTail Outputs the tail of UserChain so that we can
236237
/// garbage-collect unused instructions in UserChain.
237238
static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
238-
User *&UserChainTail, const DominatorTree *DT);
239+
User *&UserChainTail);
239240

240241
/// Looks for a constant offset from the given GEP index without extracting
241242
/// it. It returns the numeric value of the extracted constant offset (0 if
242243
/// failed). The meaning of the arguments are the same as Extract.
243-
static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
244-
const DominatorTree *DT);
244+
static int64_t Find(Value *Idx, GetElementPtrInst *GEP);
245245

246246
private:
247-
ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
248-
: IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
249-
}
247+
ConstantOffsetExtractor(Instruction *InsertionPt)
248+
: IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()) {}
250249

251250
/// Searches the expression that computes V for a non-zero constant C s.t.
252251
/// V can be reassociated into the form V' + C. If the searching is
@@ -336,7 +335,6 @@ class ConstantOffsetExtractor {
336335
Instruction *IP;
337336

338337
const DataLayout &DL;
339-
const DominatorTree *DT;
340338
};
341339

342340
/// A pass that tries to split every GEP in the function into a variadic
@@ -519,12 +517,10 @@ bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
519517
}
520518

521519
Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
522-
// Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
523-
// don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
524-
// FIXME: this does not appear to be covered by any tests
525-
// (with x86/aarch64 backends at least)
520+
// Do not trace into "or" unless it is equivalent to "add".
521+
// This is the case if the or's disjoint flag is set.
526522
if (BO->getOpcode() == Instruction::Or &&
527-
!haveNoCommonBitsSet(LHS, RHS, SimplifyQuery(DL, DT, /*AC*/ nullptr, BO)))
523+
!cast<PossiblyDisjointInst>(BO)->isDisjoint())
528524
return false;
529525

530526
// FIXME: We don't currently support constants from the RHS of subs,
@@ -778,9 +774,8 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
778774
}
779775

780776
Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
781-
User *&UserChainTail,
782-
const DominatorTree *DT) {
783-
ConstantOffsetExtractor Extractor(GEP, DT);
777+
User *&UserChainTail) {
778+
ConstantOffsetExtractor Extractor(GEP);
784779
// Find a non-zero constant offset first.
785780
APInt ConstantOffset =
786781
Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
@@ -795,10 +790,9 @@ Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
795790
return IdxWithoutConstOffset;
796791
}
797792

798-
int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP,
799-
const DominatorTree *DT) {
793+
int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) {
800794
// If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
801-
return ConstantOffsetExtractor(GEP, DT)
795+
return ConstantOffsetExtractor(GEP)
802796
.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
803797
GEP->isInBounds())
804798
.getSExtValue();
@@ -836,7 +830,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
836830

837831
// Tries to extract a constant offset from this GEP index.
838832
int64_t ConstantOffset =
839-
ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
833+
ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP);
840834
if (ConstantOffset != 0) {
841835
NeedsExtraction = true;
842836
// A GEP may have multiple indices. We accumulate the extracted
@@ -1026,7 +1020,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
10261020
Value *OldIdx = GEP->getOperand(I);
10271021
User *UserChainTail;
10281022
Value *NewIdx =
1029-
ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
1023+
ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail);
10301024
if (NewIdx != nullptr) {
10311025
// Switches to the index with the constant offset removed.
10321026
GEP->setOperand(I, NewIdx);

llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,17 @@ define amdgpu_cs void @test1(i32 %arg1, <4 x i32> inreg %arg2, i32, ptr addrspac
1919
%ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
2020
call void @llvm.amdgcn.raw.buffer.store.i32(i32 11, <4 x i32> %arg2, i32 %ad1, i32 0, i32 0)
2121

22-
%bs2 = or i32 %bs1, 1
22+
%bs2 = or disjoint i32 %bs1, 1
2323
%ep2 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs2
2424
%ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
2525
call void @llvm.amdgcn.raw.buffer.store.i32(i32 22, <4 x i32> %arg2, i32 %ad2, i32 0, i32 0)
2626

27-
%bs3 = or i32 %bs1, 2
27+
%bs3 = or disjoint i32 %bs1, 2
2828
%ep3 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs3
2929
%ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
3030
call void @llvm.amdgcn.raw.buffer.store.i32(i32 33, <4 x i32> %arg2, i32 %ad3, i32 0, i32 0)
3131

32-
%bs4 = or i32 %bs1, 3
32+
%bs4 = or disjoint i32 %bs1, 3
3333
%ep4 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs4
3434
%ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
3535
call void @llvm.amdgcn.raw.buffer.store.i32(i32 44, <4 x i32> %arg2, i32 %ad4, i32 0, i32 0)
@@ -55,17 +55,17 @@ define amdgpu_cs void @test1_ptr(i32 %arg1, ptr addrspace(8) inreg %arg2, i32, p
5555
%ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
5656
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 11, ptr addrspace(8) %arg2, i32 %ad1, i32 0, i32 0)
5757

58-
%bs2 = or i32 %bs1, 1
58+
%bs2 = or disjoint i32 %bs1, 1
5959
%ep2 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs2
6060
%ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
6161
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 22, ptr addrspace(8) %arg2, i32 %ad2, i32 0, i32 0)
6262

63-
%bs3 = or i32 %bs1, 2
63+
%bs3 = or disjoint i32 %bs1, 2
6464
%ep3 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs3
6565
%ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
6666
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 33, ptr addrspace(8) %arg2, i32 %ad3, i32 0, i32 0)
6767

68-
%bs4 = or i32 %bs1, 3
68+
%bs4 = or disjoint i32 %bs1, 3
6969
%ep4 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs4
7070
%ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
7171
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 44, ptr addrspace(8) %arg2, i32 %ad4, i32 0, i32 0)
@@ -90,17 +90,17 @@ define amdgpu_cs void @test2(i32 %arg1, <4 x i32> inreg %arg2) {
9090
%ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
9191
call void @llvm.amdgcn.raw.buffer.store.i32(i32 11, <4 x i32> %arg2, i32 %ad1, i32 0, i32 0)
9292

93-
%bs2 = or i32 %bs1, 1
93+
%bs2 = or disjoint i32 %bs1, 1
9494
%ep2 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs2
9595
%ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
9696
call void @llvm.amdgcn.raw.buffer.store.i32(i32 22, <4 x i32> %arg2, i32 %ad2, i32 0, i32 0)
9797

98-
%bs3 = or i32 %bs1, 2
98+
%bs3 = or disjoint i32 %bs1, 2
9999
%ep3 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs3
100100
%ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
101101
call void @llvm.amdgcn.raw.buffer.store.i32(i32 33, <4 x i32> %arg2, i32 %ad3, i32 0, i32 0)
102102

103-
%bs4 = or i32 %bs1, 3
103+
%bs4 = or disjoint i32 %bs1, 3
104104
%ep4 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs4
105105
%ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
106106
call void @llvm.amdgcn.raw.buffer.store.i32(i32 44, <4 x i32> %arg2, i32 %ad4, i32 0, i32 0)
@@ -125,17 +125,17 @@ define amdgpu_cs void @test2_ptr(i32 %arg1, ptr addrspace(8) inreg %arg2) {
125125
%ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
126126
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 11, ptr addrspace(8) %arg2, i32 %ad1, i32 0, i32 0)
127127

128-
%bs2 = or i32 %bs1, 1
128+
%bs2 = or disjoint i32 %bs1, 1
129129
%ep2 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs2
130130
%ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
131131
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 22, ptr addrspace(8) %arg2, i32 %ad2, i32 0, i32 0)
132132

133-
%bs3 = or i32 %bs1, 2
133+
%bs3 = or disjoint i32 %bs1, 2
134134
%ep3 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs3
135135
%ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
136136
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 33, ptr addrspace(8) %arg2, i32 %ad3, i32 0, i32 0)
137137

138-
%bs4 = or i32 %bs1, 3
138+
%bs4 = or disjoint i32 %bs1, 3
139139
%ep4 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs4
140140
%ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
141141
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 44, ptr addrspace(8) %arg2, i32 %ad4, i32 0, i32 0)

llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ main_body:
238238
%25 = getelementptr inbounds [0 x <8 x i32>], ptr addrspace(6) %1, i32 0, i32 %24, !amdgpu.uniform !0
239239
%26 = load <8 x i32>, ptr addrspace(6) %25, align 32, !invariant.load !0
240240
%27 = shl i32 %23, 2
241-
%28 = or i32 %27, 3
241+
%28 = or disjoint i32 %27, 3
242242
%29 = getelementptr inbounds [0 x <4 x i32>], ptr addrspace(6) %1, i32 0, i32 %28, !amdgpu.uniform !0
243243
%30 = load <4 x i32>, ptr addrspace(6) %29, align 16, !invariant.load !0
244244
%31 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %30, i1 0, i32 0, i32 0) #8
@@ -270,7 +270,7 @@ main_body:
270270
%25 = getelementptr inbounds [0 x <8 x i32>], ptr addrspace(6) %1, i32 0, i32 %24
271271
%26 = load <8 x i32>, ptr addrspace(6) %25, align 32, !invariant.load !0
272272
%27 = shl i32 %23, 2
273-
%28 = or i32 %27, 3
273+
%28 = or disjoint i32 %27, 3
274274
%29 = getelementptr inbounds [0 x <4 x i32>], ptr addrspace(6) %1, i32 0, i32 %28
275275
%30 = load <4 x i32>, ptr addrspace(6) %29, align 16, !invariant.load !0
276276
%31 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %30, i1 0, i32 0, i32 0) #8

llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,38 +1157,38 @@ define <8 x i16> @large_vector(ptr addrspace(3) %p, i32 %idxp) {
11571157
; GFX11-NEXT: s_setpc_b64 s[30:31]
11581158
%idx = shl i32 %idxp, 4
11591159

1160-
%i.0 = or i32 %idx, 0
1160+
%i.0 = or disjoint i32 %idx, 0
11611161
%p.0 = getelementptr half, ptr addrspace(3) %p, i32 %i.0
11621162
%x.0 = load i16, ptr addrspace(3) %p.0, align 4
11631163
%v0p = insertelement <8 x i16> poison, i16 %x.0, i32 0
1164-
%i.1 = or i32 %idx, 1
1164+
%i.1 = or disjoint i32 %idx, 1
11651165
%p.1 = getelementptr half, ptr addrspace(3) %p, i32 %i.1
11661166
%x.1 = load i16, ptr addrspace(3) %p.1, align 2
11671167
%v0 = insertelement <8 x i16> %v0p, i16 %x.1, i32 1
11681168

1169-
%i.2 = or i32 %idx, 2
1169+
%i.2 = or disjoint i32 %idx, 2
11701170
%p.2 = getelementptr half, ptr addrspace(3) %p, i32 %i.2
11711171
%x.2 = load i16, ptr addrspace(3) %p.2, align 4
11721172
%v1p = insertelement <8 x i16> poison, i16 %x.2, i32 0
1173-
%i.3 = or i32 %idx, 3
1173+
%i.3 = or disjoint i32 %idx, 3
11741174
%p.3 = getelementptr half, ptr addrspace(3) %p, i32 %i.3
11751175
%x.3 = load i16, ptr addrspace(3) %p.3, align 2
11761176
%v1 = insertelement <8 x i16> %v1p, i16 %x.3, i32 1
11771177

1178-
%i.4 = or i32 %idx, 4
1178+
%i.4 = or disjoint i32 %idx, 4
11791179
%p.4 = getelementptr half, ptr addrspace(3) %p, i32 %i.4
11801180
%x.4 = load i16, ptr addrspace(3) %p.4, align 4
11811181
%v2p = insertelement <8 x i16> poison, i16 %x.4, i32 0
1182-
%i.5 = or i32 %idx, 5
1182+
%i.5 = or disjoint i32 %idx, 5
11831183
%p.5 = getelementptr half, ptr addrspace(3) %p, i32 %i.5
11841184
%x.5 = load i16, ptr addrspace(3) %p.5, align 2
11851185
%v2 = insertelement <8 x i16> %v2p, i16 %x.5, i32 1
11861186

1187-
%i.6 = or i32 %idx, 6
1187+
%i.6 = or disjoint i32 %idx, 6
11881188
%p.6 = getelementptr half, ptr addrspace(3) %p, i32 %i.6
11891189
%x.6 = load i16, ptr addrspace(3) %p.6, align 4
11901190
%v3p = insertelement <8 x i16> poison, i16 %x.6, i32 0
1191-
%i.7 = or i32 %idx, 7
1191+
%i.7 = or disjoint i32 %idx, 7
11921192
%p.7 = getelementptr half, ptr addrspace(3) %p, i32 %i.7
11931193
%x.7 = load i16, ptr addrspace(3) %p.7, align 2
11941194
%v3 = insertelement <8 x i16> %v3p, i16 %x.7, i32 1

llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -732,7 +732,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
732732
%192 = and i64 %191, 4294967168
733733
%193 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 %192
734734
%194 = shl nuw nsw i32 %178, 5
735-
%195 = or i32 %194, 8
735+
%195 = or disjoint i32 %194, 8
736736
%196 = zext i32 %195 to i64
737737
%197 = getelementptr inbounds i8, ptr addrspace(1) %193, i64 %196
738738
%198 = getelementptr inbounds i8, ptr addrspace(1) %197, i64 -4

llvm/test/CodeGen/NVPTX/vector-loads.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,11 @@ define void @foo_complex(ptr nocapture readonly align 16 dereferenceable(1342177
7878
%t3 = shl nuw nsw i32 %t1, 9
7979
%ttile_origin.2 = and i32 %t3, 130560
8080
%tstart_offset_x_mul = shl nuw nsw i32 %t0, 1
81-
%t4 = or i32 %ttile_origin.2, %tstart_offset_x_mul
82-
%t6 = or i32 %t4, 1
83-
%t8 = or i32 %t4, 128
81+
%t4 = or disjoint i32 %ttile_origin.2, %tstart_offset_x_mul
82+
%t6 = or disjoint i32 %t4, 1
83+
%t8 = or disjoint i32 %t4, 128
8484
%t9 = zext i32 %t8 to i64
85-
%t10 = or i32 %t4, 129
85+
%t10 = or disjoint i32 %t4, 129
8686
%t11 = zext i32 %t10 to i64
8787
%t20 = zext i32 %t2 to i64
8888
%t27 = getelementptr inbounds [1024 x [131072 x i8]], ptr %alloc0, i64 0, i64 %t20, i64 %t9

llvm/test/CodeGen/PowerPC/mma-intrinsics.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ for.body: ; preds = %for.body, %for.body
496496
%idxprom = zext i32 %mul to i64
497497
%arrayidx = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom
498498
%4 = load <16 x i8>, ptr %arrayidx, align 16
499-
%add2 = or i32 %mul, 1
499+
%add2 = or disjoint i32 %mul, 1
500500
%idxprom3 = zext i32 %add2 to i64
501501
%arrayidx4 = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom3
502502
%5 = load <16 x i8>, ptr %arrayidx4, align 16

llvm/test/CodeGen/PowerPC/sched-addi.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ entry:
9999

100100
vector.body:
101101
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
102-
%offset.idx = or i64 %index, 1
102+
%offset.idx = or disjoint i64 %index, 1
103103
%0 = getelementptr %_elem_type_of_x, ptr %x_rvo_based_addr_3, i64 %offset.idx, i32 0
104104
%1 = getelementptr %_elem_type_of_a, ptr %a_rvo_based_addr_5, i64 %offset.idx, i32 0
105105
%wide.load = load <4 x double>, ptr %1, align 8

llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ main_body:
157157
%25 = getelementptr [0 x <8 x i32>], ptr addrspace(4) %1, i32 0, i32 %24, !amdgpu.uniform !0
158158
%26 = load <8 x i32>, ptr addrspace(4) %25, align 32, !invariant.load !0
159159
%27 = shl i32 %23, 2
160-
%28 = or i32 %27, 3
160+
%28 = or disjoint i32 %27, 3
161161
%29 = getelementptr [0 x <4 x i32>], ptr addrspace(4) %1, i32 0, i32 %28, !amdgpu.uniform !0
162162
%30 = load <4 x i32>, ptr addrspace(4) %29, align 16, !invariant.load !0
163163
%31 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %30, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8

llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ define ptr @sext_or(i64 %a, i32 %b) {
142142
;
143143
entry:
144144
%b1 = shl i32 %b, 2
145-
%b2 = or i32 %b1, 1 ; (b << 2) and 1 have no common bits
145+
%b2 = or disjoint i32 %b1, 1 ; (b << 2) and 1 have no common bits
146146
%b3 = or i32 %b1, 4 ; (b << 2) and 4 may have common bits
147147
%b2.ext = zext i32 %b2 to i64
148148
%b3.ext = sext i32 %b3 to i64
@@ -335,7 +335,7 @@ define ptr @shl_add_or(i64 %a, ptr %ptr) {
335335
entry:
336336
%shl = shl i64 %a, 2
337337
%add = add i64 %shl, 12
338-
%or = or i64 %add, 1
338+
%or = or disjoint i64 %add, 1
339339
; ((a << 2) + 12) and 1 have no common bits. Therefore,
340340
; SeparateConstOffsetFromGEP is able to extract the 12.
341341
; TODO(jingyue): We could reassociate the expression to combine 12 and 1.

llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,17 @@ define void @testOrDoesntSplit(ptr %p) {
2222
ret void
2323
}
2424

25-
define void @testNoBitsInCommonOrSplits(ptr %p) {
26-
; CHECK-LABEL: define void @testNoBitsInCommonOrSplits(
25+
; COM: The check for `or disjoint` removed the old hasNoBitsInCommon()
26+
; COM: check, ensure that failing to annotate an or with disjoint makes
27+
; COM: the optimization fail.
28+
define void @testNoBitsInCommonOrDoesntSplit(ptr %p) {
29+
; CHECK-LABEL: define void @testNoBitsInCommonOrDoesntSplit(
2730
; CHECK-SAME: ptr [[P:%.*]]) {
2831
; CHECK-NEXT: [[VAR:%.*]] = tail call i64 @foo()
2932
; CHECK-NEXT: [[VAR_HIGH:%.*]] = and i64 [[VAR]], -16
30-
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
31-
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[VAR_HIGH]]
32-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 10
33-
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
34-
; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1
33+
; CHECK-NEXT: [[OFF:%.*]] = or i64 [[VAR_HIGH]], 10
34+
; CHECK-NEXT: [[Q:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFF]]
35+
; CHECK-NEXT: store i8 0, ptr [[Q]], align 1
3536
; CHECK-NEXT: ret void
3637
;
3738
%var = tail call i64 @foo()
@@ -46,9 +47,11 @@ define void @testDisjointOrSplits(ptr %p) {
4647
; CHECK-LABEL: define void @testDisjointOrSplits(
4748
; CHECK-SAME: ptr [[P:%.*]]) {
4849
; CHECK-NEXT: [[VAR:%.*]] = tail call i64 @foo()
49-
; CHECK-NEXT: [[OFF:%.*]] = or disjoint i64 [[VAR]], 10
50-
; CHECK-NEXT: [[Q:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFF]]
51-
; CHECK-NEXT: store i8 0, ptr [[Q]], align 1
50+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
51+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[VAR]]
52+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 10
53+
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
54+
; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1
5255
; CHECK-NEXT: ret void
5356
;
5457
%var = tail call i64 @foo()

0 commit comments

Comments
 (0)