Skip to content

[SelectionDAG] Change computeAliasing signature from optional<uint64> to LocationSize. #83017

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#ifndef LLVM_CODEGEN_SELECTIONDAGADDRESSANALYSIS_H
#define LLVM_CODEGEN_SELECTIONDAGADDRESSANALYSIS_H

#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include <cstdint>

Expand Down Expand Up @@ -81,10 +82,8 @@ class BaseIndexOffset {

// Returns true `Op0` and `Op1` can be proven to alias/not alias, in
// which case `IsAlias` is set to true/false.
static bool computeAliasing(const SDNode *Op0,
const std::optional<int64_t> NumBytes0,
const SDNode *Op1,
const std::optional<int64_t> NumBytes1,
static bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0,
const SDNode *Op1, const LocationSize NumBytes1,
const SelectionDAG &DAG, bool &IsAlias);

/// Parses tree in N for base, index, offset addresses.
Expand Down
37 changes: 21 additions & 16 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27835,7 +27835,7 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
bool IsAtomic;
SDValue BasePtr;
int64_t Offset;
std::optional<int64_t> NumBytes;
LocationSize NumBytes;
MachineMemOperand *MMO;
};

Expand All @@ -27853,21 +27853,24 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
LSN->isAtomic(),
LSN->getBasePtr(),
Offset /*base offset*/,
std::optional<int64_t>(Size),
Size != ~UINT64_C(0) ? LocationSize::precise(Size)
: LocationSize::beforeOrAfterPointer(),
LSN->getMemOperand()};
}
if (const auto *LN = cast<LifetimeSDNode>(N))
return {false /*isVolatile*/,
/*isAtomic*/ false,
LN->getOperand(1),
(LN->hasOffset()) ? LN->getOffset() : 0,
(LN->hasOffset()) ? std::optional<int64_t>(LN->getSize())
: std::optional<int64_t>(),
(LN->hasOffset()) ? LocationSize::precise(LN->getSize())
: LocationSize::beforeOrAfterPointer(),
(MachineMemOperand *)nullptr};
// Default.
return {false /*isvolatile*/,
/*isAtomic*/ false, SDValue(),
(int64_t)0 /*offset*/, std::optional<int64_t>() /*size*/,
/*isAtomic*/ false,
SDValue(),
(int64_t)0 /*offset*/,
LocationSize::beforeOrAfterPointer() /*size*/,
(MachineMemOperand *)nullptr};
};

Expand Down Expand Up @@ -27922,18 +27925,20 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
int64_t SrcValOffset1 = MUC1.MMO->getOffset();
Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
auto &Size0 = MUC0.NumBytes;
auto &Size1 = MUC1.NumBytes;
LocationSize Size0 = MUC0.NumBytes;
LocationSize Size1 = MUC1.NumBytes;
if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
Size0.has_value() && Size1.has_value() && *Size0 == *Size1 &&
OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
SrcValOffset1 % *Size1 == 0) {
Size0.hasValue() && Size1.hasValue() && Size0 == Size1 &&
OrigAlignment0 > Size0.getValue() &&
SrcValOffset0 % Size0.getValue() == 0 &&
SrcValOffset1 % Size1.getValue() == 0) {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();

// There is no overlap between these relatively aligned accesses of
// similar size. Return no alias.
if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
if ((OffAlign0 + (int64_t)Size0.getValue()) <= OffAlign1 ||
(OffAlign1 + (int64_t)Size1.getValue()) <= OffAlign0)
return false;
}

Expand All @@ -27946,12 +27951,12 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
UseAA = false;
#endif

if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0 &&
Size1) {
if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
Size0.hasValue() && Size1.hasValue()) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
int64_t Overlap0 = Size0.getValue() + SrcValOffset0 - MinOffset;
int64_t Overlap1 = Size1.getValue() + SrcValOffset1 - MinOffset;
if (AA->isNoAlias(
MemoryLocation(MUC0.MMO->getValue(), Overlap0,
UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
Expand Down
20 changes: 9 additions & 11 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,10 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
}

bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
const std::optional<int64_t> NumBytes0,
const LocationSize NumBytes0,
const SDNode *Op1,
const std::optional<int64_t> NumBytes1,
const LocationSize NumBytes1,
const SelectionDAG &DAG, bool &IsAlias) {

BaseIndexOffset BasePtr0 = match(Op0, DAG);
if (!BasePtr0.getBase().getNode())
return false;
Expand All @@ -105,27 +104,26 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
return false;

int64_t PtrDiff;
if (NumBytes0 && NumBytes1 &&
BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
// If the size of memory access is unknown, do not use it to analysis.
// One example of unknown size memory access is to load/store scalable
// vector objects on the stack.
// BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
// following situations arise:
if (PtrDiff >= 0 &&
*NumBytes0 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
if (PtrDiff >= 0 && NumBytes0.hasValue() && !NumBytes0.isScalable()) {
// [----BasePtr0----]
// [---BasePtr1--]
// ========PtrDiff========>
IsAlias = !(*NumBytes0 <= PtrDiff);
IsAlias = !(static_cast<int64_t>(NumBytes0.getValue().getFixedValue()) <=
PtrDiff);
return true;
}
if (PtrDiff < 0 &&
*NumBytes1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
if (PtrDiff < 0 && NumBytes1.hasValue() && !NumBytes1.isScalable()) {
// [----BasePtr0----]
// [---BasePtr1--]
// =====(-PtrDiff)====>
IsAlias = !((PtrDiff + *NumBytes1) <= 0);
IsAlias = !((PtrDiff + static_cast<int64_t>(
NumBytes1.getValue().getFixedValue())) <= 0);
return true;
}
return false;
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ define void @array_1D(ptr %addr) #0 {
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z2.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1d { z2.d }, p0, [sp]
; CHECK-NEXT: addvl sp, sp, #3
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
Expand Down Expand Up @@ -81,18 +81,18 @@ define void @array_2D(ptr %addr) #0 {
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 48 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #5, mul vl]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #4, mul vl]
; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0, #3, mul vl]
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #5, mul vl]
; CHECK-NEXT: st1d { z3.d }, p0, [sp, #4, mul vl]
; CHECK-NEXT: st1d { z5.d }, p0, [sp, #3, mul vl]
; CHECK-NEXT: st1d { z4.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z2.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #5, mul vl]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #4, mul vl]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #3, mul vl]
; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #5, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #4, mul vl]
; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl]
; CHECK-NEXT: st1d { z5.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z4.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1d { z2.d }, p0, [sp]
; CHECK-NEXT: addvl sp, sp, #6
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ define void @test(ptr %addr) #0 {
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z2.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1d { z2.d }, p0, [sp]
; CHECK-NEXT: addvl sp, sp, #3
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ define void @test(ptr %addr) {
; CHECK-NEXT: add a2, a0, a1
; CHECK-NEXT: vl1re64.v v8, (a2)
; CHECK-NEXT: slli a2, a1, 1
; CHECK-NEXT: vl1re64.v v9, (a0)
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: add a3, a0, a2
; CHECK-NEXT: vl1re64.v v9, (a3)
; CHECK-NEXT: vl1re64.v v10, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v9, (a0)
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vs1r.v v10, (a2)
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs1r.v v8, (a0)
; CHECK-NEXT: vs1r.v v9, (a2)
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vs1r.v v8, (a1)
; CHECK-NEXT: vs1r.v v10, (a0)
; CHECK-NEXT: csrrs a0, vlenb, zero
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: add sp, sp, a0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ define <vscale x 1 x double> @test(ptr %addr, i64 %vl) {
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
; CHECK-NEXT: csrrs a2, vlenb, zero
; CHECK-NEXT: vl1re64.v v8, (a0)
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: add a3, a0, a2
; CHECK-NEXT: vl1re64.v v8, (a3)
; CHECK-NEXT: vl1re64.v v9, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v8, (a0)
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vs1r.v v9, (a2)
; CHECK-NEXT: vs1r.v v8, (a2)
; CHECK-NEXT: vs1r.v v9, (a0)
; CHECK-NEXT: vl1re64.v v8, (a2)
; CHECK-NEXT: vl1re64.v v9, (a0)
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
Expand Down
Loading