Skip to content

Commit ef8e033

Browse files
authored
[llvm][aarch64] Add support for the MS qualifiers __ptr32, __ptr64, _sptr, __uptr (#112793)
MSVC has a set of qualifiers to allow using 32-bit signed/unsigned pointers when building 64-bit targets. This is useful for WoW code (i.e., the part of Windows that handles running 32-bit application on a 64-bit OS). Currently this is supported on x64 using the 270, 271 and 272 address spaces, but does not work for AArch64 at all. This change handles pointers in the new address spaces by truncating or extending the value as required. The implementation is modeled after x86. Note that the initial version of this change that was never merged (<https://reviews.llvm.org/D158931>) took a much different approach that involved arch-specific handling in the DAG combiner/selector, which didn't feel like the correct approach. That previous approach also used `UBFM` for all 32-bit to 64-bit zero-extensions, which resulted in a lot of `lsr` instructions being added. For example, in the `ptradd.ll` test, it resulted in: ``` %add = add i32 %b, %a %conv = zext i32 %add to i64 ``` Being expanded to: ``` add w8, w1, w0 lsr w0, w8, #0 ``` Where the `lsr` instruction wasn't previously being added. I don't know enough about the exact details of AArch64 to know if that's a desirable change, so I've left it out of my change.   Backend half of #111879
1 parent eb0af4e commit ef8e033

File tree

6 files changed

+282
-13
lines changed

6 files changed

+282
-13
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
530530
setOperationAction(ISD::XOR, MVT::i32, Custom);
531531
setOperationAction(ISD::XOR, MVT::i64, Custom);
532532

533+
setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
534+
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
535+
533536
// Virtually no operation on f128 is legal, but LLVM can't expand them when
534537
// there's a valid register class, so we need custom operations in most cases.
535538
setOperationAction(ISD::FABS, MVT::f128, Expand);
@@ -6880,6 +6883,37 @@ static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
68806883
ST->getBasePtr(), ST->getMemOperand());
68816884
}
68826885

6886+
static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
6887+
SDLoc dl(Op);
6888+
SDValue Src = Op.getOperand(0);
6889+
MVT DestVT = Op.getSimpleValueType();
6890+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6891+
AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
6892+
6893+
unsigned SrcAS = N->getSrcAddressSpace();
6894+
unsigned DestAS = N->getDestAddressSpace();
6895+
assert(SrcAS != DestAS &&
6896+
"addrspacecast must be between different address spaces");
6897+
assert(TLI.getTargetMachine().getPointerSize(SrcAS) !=
6898+
TLI.getTargetMachine().getPointerSize(DestAS) &&
6899+
"addrspacecast must be between different ptr sizes");
6900+
6901+
if (SrcAS == ARM64AS::PTR32_SPTR) {
6902+
return DAG.getNode(ISD::SIGN_EXTEND, dl, DestVT, Src,
6903+
DAG.getTargetConstant(0, dl, DestVT));
6904+
} else if (SrcAS == ARM64AS::PTR32_UPTR) {
6905+
return DAG.getNode(ISD::ZERO_EXTEND, dl, DestVT, Src,
6906+
DAG.getTargetConstant(0, dl, DestVT));
6907+
} else if ((DestAS == ARM64AS::PTR32_SPTR) ||
6908+
(DestAS == ARM64AS::PTR32_UPTR)) {
6909+
SDValue Ext = DAG.getAnyExtOrTrunc(Src, dl, DestVT);
6910+
SDValue Trunc = DAG.getZeroExtendInReg(Ext, dl, DestVT);
6911+
return Trunc;
6912+
} else {
6913+
return Src;
6914+
}
6915+
}
6916+
68836917
// Custom lowering for any store, vector or scalar and/or default or with
68846918
// a truncate operations. Currently only custom lower truncate operation
68856919
// from vector v4i16 to v4i8 or volatile stores of i128.
@@ -7541,6 +7575,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
75417575
case ISD::SIGN_EXTEND:
75427576
case ISD::ZERO_EXTEND:
75437577
return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
7578+
case ISD::ADDRSPACECAST:
7579+
return LowerADDRSPACECAST(Op, DAG);
75447580
case ISD::SIGN_EXTEND_INREG: {
75457581
// Only custom lower when ExtraVT has a legal byte based element type.
75467582
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -23555,6 +23591,26 @@ static SDValue performLOADCombine(SDNode *N,
2355523591
performTBISimplification(N->getOperand(1), DCI, DAG);
2355623592

2355723593
LoadSDNode *LD = cast<LoadSDNode>(N);
23594+
EVT RegVT = LD->getValueType(0);
23595+
EVT MemVT = LD->getMemoryVT();
23596+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23597+
SDLoc DL(LD);
23598+
23599+
// Cast ptr32 and ptr64 pointers to the default address space before a load.
23600+
unsigned AddrSpace = LD->getAddressSpace();
23601+
if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR ||
23602+
AddrSpace == ARM64AS::PTR32_UPTR) {
23603+
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
23604+
if (PtrVT != LD->getBasePtr().getSimpleValueType()) {
23605+
SDValue Cast =
23606+
DAG.getAddrSpaceCast(DL, PtrVT, LD->getBasePtr(), AddrSpace, 0);
23607+
return DAG.getExtLoad(LD->getExtensionType(), DL, RegVT, LD->getChain(),
23608+
Cast, LD->getPointerInfo(), MemVT,
23609+
LD->getOriginalAlign(),
23610+
LD->getMemOperand()->getFlags());
23611+
}
23612+
}
23613+
2355823614
if (LD->isVolatile() || !Subtarget->isLittleEndian())
2355923615
return SDValue(N, 0);
2356023616

@@ -23564,13 +23620,11 @@ static SDValue performLOADCombine(SDNode *N,
2356423620
if (!LD->isNonTemporal())
2356523621
return SDValue(N, 0);
2356623622

23567-
EVT MemVT = LD->getMemoryVT();
2356823623
if (MemVT.isScalableVector() || MemVT.getSizeInBits() <= 256 ||
2356923624
MemVT.getSizeInBits() % 256 == 0 ||
2357023625
256 % MemVT.getScalarSizeInBits() != 0)
2357123626
return SDValue(N, 0);
2357223627

23573-
SDLoc DL(LD);
2357423628
SDValue Chain = LD->getChain();
2357523629
SDValue BasePtr = LD->getBasePtr();
2357623630
SDNodeFlags Flags = LD->getFlags();
@@ -23830,12 +23884,28 @@ static SDValue performSTORECombine(SDNode *N,
2383023884
SDValue Value = ST->getValue();
2383123885
SDValue Ptr = ST->getBasePtr();
2383223886
EVT ValueVT = Value.getValueType();
23887+
EVT MemVT = ST->getMemoryVT();
23888+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23889+
SDLoc DL(ST);
2383323890

2383423891
auto hasValidElementTypeForFPTruncStore = [](EVT VT) {
2383523892
EVT EltVT = VT.getVectorElementType();
2383623893
return EltVT == MVT::f32 || EltVT == MVT::f64;
2383723894
};
2383823895

23896+
// Cast ptr32 and ptr64 pointers to the default address space before a store.
23897+
unsigned AddrSpace = ST->getAddressSpace();
23898+
if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR ||
23899+
AddrSpace == ARM64AS::PTR32_UPTR) {
23900+
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
23901+
if (PtrVT != Ptr.getSimpleValueType()) {
23902+
SDValue Cast = DAG.getAddrSpaceCast(DL, PtrVT, Ptr, AddrSpace, 0);
23903+
return DAG.getStore(Chain, DL, Value, Cast, ST->getPointerInfo(),
23904+
ST->getOriginalAlign(),
23905+
ST->getMemOperand()->getFlags(), ST->getAAInfo());
23906+
}
23907+
}
23908+
2383923909
if (SDValue Res = combineI8TruncStore(ST, DAG, Subtarget))
2384023910
return Res;
2384123911

@@ -23849,8 +23919,8 @@ static SDValue performSTORECombine(SDNode *N,
2384923919
ValueVT.isFixedLengthVector() &&
2385023920
ValueVT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits() &&
2385123921
hasValidElementTypeForFPTruncStore(Value.getOperand(0).getValueType()))
23852-
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
23853-
ST->getMemoryVT(), ST->getMemOperand());
23922+
return DAG.getTruncStore(Chain, DL, Value.getOperand(0), Ptr, MemVT,
23923+
ST->getMemOperand());
2385423924

2385523925
if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
2385623926
return Split;
@@ -27391,6 +27461,11 @@ void AArch64TargetLowering::ReplaceNodeResults(
2739127461
ReplaceATOMIC_LOAD_128Results(N, Results, DAG, Subtarget);
2739227462
return;
2739327463
}
27464+
case ISD::ADDRSPACECAST: {
27465+
SDValue V = LowerADDRSPACECAST(SDValue(N, 0), DAG);
27466+
Results.push_back(V);
27467+
return;
27468+
}
2739427469
case ISD::ATOMIC_LOAD:
2739527470
case ISD::LOAD: {
2739627471
MemSDNode *LoadNode = cast<MemSDNode>(N);

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,10 @@ const unsigned StackProbeMaxLoopUnroll = 4;
563563

564564
} // namespace AArch64
565565

566+
namespace ARM64AS {
567+
enum : unsigned { PTR32_SPTR = 270, PTR32_UPTR = 271, PTR64 = 272 };
568+
}
569+
566570
class AArch64Subtarget;
567571

568572
class AArch64TargetLowering : public TargetLowering {
@@ -594,11 +598,19 @@ class AArch64TargetLowering : public TargetLowering {
594598
unsigned Depth) const override;
595599

596600
MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
597-
// Returning i64 unconditionally here (i.e. even for ILP32) means that the
598-
// *DAG* representation of pointers will always be 64-bits. They will be
599-
// truncated and extended when transferred to memory, but the 64-bit DAG
600-
// allows us to use AArch64's addressing modes much more easily.
601-
return MVT::getIntegerVT(64);
601+
if ((AS == ARM64AS::PTR32_SPTR) || (AS == ARM64AS::PTR32_UPTR)) {
602+
// These are 32-bit pointers created using the `__ptr32` extension or
603+
// similar. They are handled by marking them as being in a different
604+
// address space, and will be extended to 64-bits when used as the target
605+
// of a load or store operation, or cast to a 64-bit pointer type.
606+
return MVT::i32;
607+
} else {
608+
// Returning i64 unconditionally here (i.e. even for ILP32) means that the
609+
// *DAG* representation of pointers will always be 64-bits. They will be
610+
// truncated and extended when transferred to memory, but the 64-bit DAG
611+
// allows us to use AArch64's addressing modes much more easily.
612+
return MVT::i64;
613+
}
602614
}
603615

604616
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,

llvm/lib/Target/AArch64/AArch64TargetMachine.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@ class AArch64TargetMachine : public CodeGenTargetMachineImpl {
6868

6969
/// Returns true if a cast between SrcAS and DestAS is a noop.
7070
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
71-
// Addrspacecasts are always noops.
72-
return true;
71+
return getPointerSize(SrcAS) == getPointerSize(DestAS);
7372
}
7473

7574
private:

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2999,9 +2999,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
29992999
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
30003000
LLT PtrTy = MRI.getType(LdSt.getPointerReg());
30013001

3002+
// Can only handle AddressSpace 0, 64-bit pointers.
30023003
if (PtrTy != LLT::pointer(0, 64)) {
3003-
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
3004-
<< ", expected: " << LLT::pointer(0, 64) << '\n');
30053004
return false;
30063005
}
30073006

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s | FileCheck %s
3+
; RUN: llc --fast-isel < %s | FileCheck %s
4+
; RUN: llc --global-isel --global-isel-abort=2 < %s | FileCheck %s
5+
6+
; Source to regenerate:
7+
; struct Foo {
8+
; int * __ptr32 p32;
9+
; int * __ptr64 p64;
10+
; __attribute__((address_space(9))) int *p_other;
11+
; };
12+
; extern "C" void use_foo(Foo *f);
13+
; extern "C" int use_int(int i);
14+
; extern "C" void test_sign_ext(Foo *f, int * __ptr32 __sptr i) {
15+
; f->p64 = i;
16+
; use_foo(f);
17+
; }
18+
; extern "C" void test_sign_ext_store_load(int * __ptr32 __sptr i) {
19+
; *i = use_int(*i);
20+
; }
21+
; extern "C" void test_zero_ext(Foo *f, int * __ptr32 __uptr i) {
22+
; f->p64 = i;
23+
; use_foo(f);
24+
; }
25+
; extern "C" void test_zero_ext_store_load(int * __ptr32 __uptr i) {
26+
; *i = use_int(*i);
27+
; }
28+
; extern "C" void test_trunc(Foo *f, int * __ptr64 i) {
29+
; f->p32 = i;
30+
; use_foo(f);
31+
; }
32+
; extern "C" void test_noop1(Foo *f, int * __ptr32 i) {
33+
; f->p32 = i;
34+
; use_foo(f);
35+
; }
36+
; extern "C" void test_noop2(Foo *f, int * __ptr64 i) {
37+
; f->p64 = i;
38+
; use_foo(f);
39+
; }
40+
; extern "C" void test_null_arg(Foo *f, int * __ptr32 i) {
41+
; test_noop1(f, 0);
42+
; }
43+
; extern "C" void test_unrecognized(Foo *f, __attribute__((address_space(14))) int *i) {
44+
; f->p32 = (int * __ptr32)i;
45+
; use_foo(f);
46+
; }
47+
;
48+
; $ clang --target=aarch64-windows-msvc -fms-extensions -O2 -S -emit-llvm t.cpp
49+
50+
target datalayout = "e-m:w-p:64:64-i32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
51+
target triple = "aarch64-unknown-windows-msvc"
52+
53+
; Function Attrs: mustprogress uwtable
54+
define dso_local void @test_sign_ext(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 {
55+
; CHECK-LABEL: test_sign_ext:
56+
; CHECK: // %bb.0: // %entry
57+
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
58+
; CHECK-NEXT: sxtw x8, w1
59+
; CHECK-NEXT: str x8, [x0, #8]
60+
; CHECK-NEXT: b use_foo
61+
entry:
62+
%0 = addrspacecast ptr addrspace(270) %i to ptr
63+
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
64+
store ptr %0, ptr %p64, align 8
65+
tail call void @use_foo(ptr noundef %f)
66+
ret void
67+
}
68+
69+
declare dso_local void @use_foo(ptr noundef) local_unnamed_addr #1
70+
71+
; Function Attrs: mustprogress uwtable
72+
define dso_local void @test_sign_ext_store_load(ptr addrspace(270) nocapture noundef %i) local_unnamed_addr #0 {
73+
; CHECK-LABEL: test_sign_ext_store_load:
74+
; CHECK: // %bb.0: // %entry
75+
; CHECK: sxtw x19, w0
76+
; CHECK-NEXT: ldr w0, [x19]
77+
; CHECK-NEXT: bl use_int
78+
; CHECK-NEXT: str w0, [x19]
79+
entry:
80+
%0 = load i32, ptr addrspace(270) %i, align 4
81+
%call = tail call i32 @use_int(i32 noundef %0)
82+
store i32 %call, ptr addrspace(270) %i, align 4
83+
ret void
84+
}
85+
86+
declare dso_local i32 @use_int(i32 noundef) local_unnamed_addr #1
87+
88+
; Function Attrs: mustprogress uwtable
89+
define dso_local void @test_zero_ext(ptr noundef %f, ptr addrspace(271) noundef %i) local_unnamed_addr #0 {
90+
; CHECK-LABEL: test_zero_ext:
91+
; CHECK: // %bb.0: // %entry
92+
; CHECK-NEXT: mov w8, w1
93+
; CHECK-NEXT: str x8, [x0, #8]
94+
; CHECK-NEXT: b use_foo
95+
entry:
96+
%0 = addrspacecast ptr addrspace(271) %i to ptr
97+
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
98+
store ptr %0, ptr %p64, align 8
99+
tail call void @use_foo(ptr noundef %f)
100+
ret void
101+
}
102+
103+
; Function Attrs: mustprogress uwtable
104+
define dso_local void @test_zero_ext_store_load(ptr addrspace(271) nocapture noundef %i) local_unnamed_addr #0 {
105+
; CHECK-LABEL: test_zero_ext_store_load:
106+
; CHECK: // %bb.0: // %entry
107+
; CHECK: mov w19, w0
108+
; CHECK-NEXT: ldr w0, [x19]
109+
; CHECK-NEXT: bl use_int
110+
; CHECK-NEXT: str w0, [x19]
111+
entry:
112+
%0 = load i32, ptr addrspace(271) %i, align 4
113+
%call = tail call i32 @use_int(i32 noundef %0)
114+
store i32 %call, ptr addrspace(271) %i, align 4
115+
ret void
116+
}
117+
118+
; Function Attrs: mustprogress uwtable
119+
define dso_local void @test_trunc(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 {
120+
; CHECK-LABEL: test_trunc:
121+
; CHECK: // %bb.0: // %entry
122+
; CHECK-NEXT: str w1, [x0]
123+
; CHECK-NEXT: b use_foo
124+
entry:
125+
%0 = addrspacecast ptr %i to ptr addrspace(270)
126+
store ptr addrspace(270) %0, ptr %f, align 8
127+
tail call void @use_foo(ptr noundef nonnull %f)
128+
ret void
129+
}
130+
131+
; Function Attrs: mustprogress uwtable
132+
define dso_local void @test_noop1(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 {
133+
; CHECK-LABEL: test_noop1:
134+
; CHECK: // %bb.0: // %entry
135+
; CHECK-NEXT: str w1, [x0]
136+
; CHECK-NEXT: b use_foo
137+
entry:
138+
store ptr addrspace(270) %i, ptr %f, align 8
139+
tail call void @use_foo(ptr noundef nonnull %f)
140+
ret void
141+
}
142+
143+
; Function Attrs: mustprogress uwtable
144+
define dso_local void @test_noop2(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 {
145+
; CHECK-LABEL: test_noop2:
146+
; CHECK: // %bb.0: // %entry
147+
; CHECK-NEXT: str x1, [x0, #8]
148+
; CHECK-NEXT: b use_foo
149+
entry:
150+
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
151+
store ptr %i, ptr %p64, align 8
152+
tail call void @use_foo(ptr noundef %f)
153+
ret void
154+
}
155+
156+
; Function Attrs: mustprogress uwtable
157+
define dso_local void @test_null_arg(ptr noundef %f, ptr addrspace(270) nocapture noundef readnone %i) local_unnamed_addr #0 {
158+
; CHECK-LABEL: test_null_arg:
159+
; CHECK: // %bb.0: // %entry
160+
; CHECK-NEXT: str wzr, [x0]
161+
; CHECK-NEXT: b use_foo
162+
entry:
163+
store ptr addrspace(270) null, ptr %f, align 8
164+
tail call void @use_foo(ptr noundef nonnull %f)
165+
ret void
166+
}
167+
168+
; Function Attrs: mustprogress uwtable
169+
define dso_local void @test_unrecognized(ptr noundef %f, ptr addrspace(14) noundef %i) local_unnamed_addr #0 {
170+
; CHECK-LABEL: test_unrecognized:
171+
; CHECK: // %bb.0: // %entry
172+
; CHECK-NEXT: str w1, [x0]
173+
; CHECK-NEXT: b use_foo
174+
entry:
175+
%0 = addrspacecast ptr addrspace(14) %i to ptr addrspace(270)
176+
store ptr addrspace(270) %0, ptr %f, align 8
177+
tail call void @use_foo(ptr noundef nonnull %f)
178+
ret void
179+
}
180+
181+
attributes #0 = { mustprogress uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }
182+
attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }

llvm/test/CodeGen/X86/mixed-ptr-sizes.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s | FileCheck %s --check-prefixes=ALL,CHECK
33
; RUN: llc -O0 < %s | FileCheck %s --check-prefixes=ALL,CHECK-O0
4+
; RUN: llc --fast-isel < %s | FileCheck %s --check-prefixes=ALL,CHECK
5+
; RUN: llc --global-isel --global-isel-abort=2 < %s | FileCheck %s --check-prefixes=ALL,CHECK
46

57
; Source to regenerate:
68
; struct Foo {

0 commit comments

Comments
 (0)