Skip to content

Commit 4928093

Browse files
authored
[CIR] Upstream support for address of and dereference (llvm#134317)
This adds support for handling the address of and dereference unary operations in ClangIR code generation. This also adds handling for nullptr and proper initialization via the NullToPointer cast.
1 parent 9bfb4b8 commit 4928093

File tree

12 files changed

+365
-7
lines changed

12 files changed

+365
-7
lines changed

clang/include/clang/CIR/Dialect/IR/CIROps.td

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -400,22 +400,29 @@ def LoadOp : CIR_Op<"load", [
400400
let summary = "Load value from memory adddress";
401401
let description = [{
402402
`cir.load` reads a value (lvalue to rvalue conversion) given an address
403-
backed up by a `cir.ptr` type.
403+
backed up by a `cir.ptr` type. A unit attribute `deref` can be used to
404+
mark the resulting value as used by another operation to dereference
405+
a pointer.
404406

405407
Example:
406408

407409
```mlir
408410

409411
// Read from local variable, address in %0.
410412
%1 = cir.load %0 : !cir.ptr<i32>, i32
413+
414+
// Load address from memory at address %0. %3 is used by at least one
415+
// operation that dereferences a pointer.
416+
%3 = cir.load deref %0 : !cir.ptr<!cir.ptr<i32>>
411417
```
412418
}];
413419

414420
let arguments = (ins Arg<CIR_PointerType, "the address to load from",
415-
[MemRead]>:$addr);
421+
[MemRead]>:$addr, UnitAttr:$isDeref);
416422
let results = (outs CIR_AnyType:$result);
417423

418424
let assemblyFormat = [{
425+
(`deref` $isDeref^)?
419426
$addr `:` qualified(type($addr)) `,` type($result) attr-dict
420427
}];
421428

clang/include/clang/CIR/MissingFeatures.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ struct MissingFeatures {
107107
static bool cgFPOptionsRAII() { return false; }
108108
static bool metaDataNode() { return false; }
109109
static bool fastMathFlags() { return false; }
110+
static bool lvalueBaseInfo() { return false; }
111+
static bool alignCXXRecordDecl() { return false; }
112+
static bool setNonGC() { return false; }
110113

111114
// Missing types
112115
static bool dataMemberType() { return false; }

clang/lib/CIR/CodeGen/CIRGenExpr.cpp

Lines changed: 160 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,147 @@ using namespace clang;
2525
using namespace clang::CIRGen;
2626
using namespace cir;
2727

28+
/// Given an expression of pointer type, try to
29+
/// derive a more accurate bound on the alignment of the pointer.
30+
Address CIRGenFunction::emitPointerWithAlignment(const Expr *expr) {
31+
// We allow this with ObjC object pointers because of fragile ABIs.
32+
assert(expr->getType()->isPointerType() ||
33+
expr->getType()->isObjCObjectPointerType());
34+
expr = expr->IgnoreParens();
35+
36+
// Casts:
37+
if (auto const *ce = dyn_cast<CastExpr>(expr)) {
38+
if (auto const *ece = dyn_cast<ExplicitCastExpr>(ce)) {
39+
cgm.errorNYI(expr->getSourceRange(),
40+
"emitPointerWithAlignment: explicit cast");
41+
return Address::invalid();
42+
}
43+
44+
switch (ce->getCastKind()) {
45+
// Non-converting casts (but not C's implicit conversion from void*).
46+
case CK_BitCast:
47+
case CK_NoOp:
48+
case CK_AddressSpaceConversion: {
49+
cgm.errorNYI(expr->getSourceRange(),
50+
"emitPointerWithAlignment: noop cast");
51+
return Address::invalid();
52+
} break;
53+
54+
// Array-to-pointer decay. TODO(cir): BaseInfo and TBAAInfo.
55+
case CK_ArrayToPointerDecay: {
56+
cgm.errorNYI(expr->getSourceRange(),
57+
"emitPointerWithAlignment: array-to-pointer decay");
58+
return Address::invalid();
59+
}
60+
61+
case CK_UncheckedDerivedToBase:
62+
case CK_DerivedToBase: {
63+
cgm.errorNYI(expr->getSourceRange(),
64+
"emitPointerWithAlignment: derived-to-base cast");
65+
return Address::invalid();
66+
}
67+
68+
case CK_AnyPointerToBlockPointerCast:
69+
case CK_BaseToDerived:
70+
case CK_BaseToDerivedMemberPointer:
71+
case CK_BlockPointerToObjCPointerCast:
72+
case CK_BuiltinFnToFnPtr:
73+
case CK_CPointerToObjCPointerCast:
74+
case CK_DerivedToBaseMemberPointer:
75+
case CK_Dynamic:
76+
case CK_FunctionToPointerDecay:
77+
case CK_IntegralToPointer:
78+
case CK_LValueToRValue:
79+
case CK_LValueToRValueBitCast:
80+
case CK_NullToMemberPointer:
81+
case CK_NullToPointer:
82+
case CK_ReinterpretMemberPointer:
83+
// Common pointer conversions, nothing to do here.
84+
// TODO: Is there any reason to treat base-to-derived conversions
85+
// specially?
86+
break;
87+
88+
case CK_ARCConsumeObject:
89+
case CK_ARCExtendBlockObject:
90+
case CK_ARCProduceObject:
91+
case CK_ARCReclaimReturnedObject:
92+
case CK_AtomicToNonAtomic:
93+
case CK_BooleanToSignedIntegral:
94+
case CK_ConstructorConversion:
95+
case CK_CopyAndAutoreleaseBlockObject:
96+
case CK_Dependent:
97+
case CK_FixedPointCast:
98+
case CK_FixedPointToBoolean:
99+
case CK_FixedPointToFloating:
100+
case CK_FixedPointToIntegral:
101+
case CK_FloatingCast:
102+
case CK_FloatingComplexCast:
103+
case CK_FloatingComplexToBoolean:
104+
case CK_FloatingComplexToIntegralComplex:
105+
case CK_FloatingComplexToReal:
106+
case CK_FloatingRealToComplex:
107+
case CK_FloatingToBoolean:
108+
case CK_FloatingToFixedPoint:
109+
case CK_FloatingToIntegral:
110+
case CK_HLSLAggregateSplatCast:
111+
case CK_HLSLArrayRValue:
112+
case CK_HLSLElementwiseCast:
113+
case CK_HLSLVectorTruncation:
114+
case CK_IntToOCLSampler:
115+
case CK_IntegralCast:
116+
case CK_IntegralComplexCast:
117+
case CK_IntegralComplexToBoolean:
118+
case CK_IntegralComplexToFloatingComplex:
119+
case CK_IntegralComplexToReal:
120+
case CK_IntegralRealToComplex:
121+
case CK_IntegralToBoolean:
122+
case CK_IntegralToFixedPoint:
123+
case CK_IntegralToFloating:
124+
case CK_LValueBitCast:
125+
case CK_MatrixCast:
126+
case CK_MemberPointerToBoolean:
127+
case CK_NonAtomicToAtomic:
128+
case CK_ObjCObjectLValueCast:
129+
case CK_PointerToBoolean:
130+
case CK_PointerToIntegral:
131+
case CK_ToUnion:
132+
case CK_ToVoid:
133+
case CK_UserDefinedConversion:
134+
case CK_VectorSplat:
135+
case CK_ZeroToOCLOpaqueType:
136+
llvm_unreachable("unexpected cast for emitPointerWithAlignment");
137+
}
138+
}
139+
140+
// Unary &
141+
if (const UnaryOperator *uo = dyn_cast<UnaryOperator>(expr)) {
142+
// TODO(cir): maybe we should use cir.unary for pointers here instead.
143+
if (uo->getOpcode() == UO_AddrOf) {
144+
cgm.errorNYI(expr->getSourceRange(), "emitPointerWithAlignment: unary &");
145+
return Address::invalid();
146+
}
147+
}
148+
149+
// std::addressof and variants.
150+
if (auto const *call = dyn_cast<CallExpr>(expr)) {
151+
switch (call->getBuiltinCallee()) {
152+
default:
153+
break;
154+
case Builtin::BIaddressof:
155+
case Builtin::BI__addressof:
156+
case Builtin::BI__builtin_addressof: {
157+
cgm.errorNYI(expr->getSourceRange(),
158+
"emitPointerWithAlignment: builtin addressof");
159+
return Address::invalid();
160+
}
161+
}
162+
}
163+
164+
// Otherwise, use the alignment of the type.
165+
return makeNaturalAddressForPointer(
166+
emitScalarExpr(expr), expr->getType()->getPointeeType(), CharUnits());
167+
}
168+
28169
void CIRGenFunction::emitStoreThroughLValue(RValue src, LValue dst,
29170
bool isInit) {
30171
if (!dst.isSimple()) {
@@ -193,8 +334,25 @@ LValue CIRGenFunction::emitUnaryOpLValue(const UnaryOperator *e) {
193334

194335
switch (op) {
195336
case UO_Deref: {
196-
cgm.errorNYI(e->getSourceRange(), "UnaryOp dereference");
197-
return LValue();
337+
QualType t = e->getSubExpr()->getType()->getPointeeType();
338+
assert(!t.isNull() && "CodeGenFunction::EmitUnaryOpLValue: Illegal type");
339+
340+
assert(!cir::MissingFeatures::lvalueBaseInfo());
341+
assert(!cir::MissingFeatures::opTBAA());
342+
Address addr = emitPointerWithAlignment(e->getSubExpr());
343+
344+
// Tag 'load' with deref attribute.
345+
// FIXME: This misses some derefence cases and has problematic interactions
346+
// with other operators.
347+
if (auto loadOp =
348+
dyn_cast<cir::LoadOp>(addr.getPointer().getDefiningOp())) {
349+
loadOp.setIsDerefAttr(mlir::UnitAttr::get(&getMLIRContext()));
350+
}
351+
352+
LValue lv = LValue::makeAddr(addr, t);
353+
assert(!cir::MissingFeatures::addressSpace());
354+
assert(!cir::MissingFeatures::setNonGC());
355+
return lv;
198356
}
199357
case UO_Real:
200358
case UO_Imag: {

clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,11 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
161161
return VisitCastExpr(e);
162162
}
163163

164+
mlir::Value VisitCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *e) {
165+
return cgf.cgm.emitNullConstant(e->getType(),
166+
cgf.getLoc(e->getSourceRange()));
167+
}
168+
164169
/// Perform a pointer to boolean conversion.
165170
mlir::Value emitPointerToBoolConversion(mlir::Value v, QualType qt) {
166171
// TODO(cir): comparing the ptr to null is done when lowering CIR to LLVM.
@@ -444,6 +449,22 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
444449
llvm_unreachable("Unexpected signed overflow behavior kind");
445450
}
446451

452+
mlir::Value VisitUnaryAddrOf(const UnaryOperator *e) {
453+
if (llvm::isa<MemberPointerType>(e->getType())) {
454+
cgf.cgm.errorNYI(e->getSourceRange(), "Address of member pointer");
455+
return builder.getNullPtr(cgf.convertType(e->getType()),
456+
cgf.getLoc(e->getExprLoc()));
457+
}
458+
459+
return cgf.emitLValue(e->getSubExpr()).getPointer();
460+
}
461+
462+
mlir::Value VisitUnaryDeref(const UnaryOperator *e) {
463+
if (e->getType()->isVoidType())
464+
return Visit(e->getSubExpr()); // the actual value should be unused
465+
return emitLoadOfLValue(e);
466+
}
467+
447468
mlir::Value VisitUnaryPlus(const UnaryOperator *e) {
448469
return emitUnaryPlusOrMinus(e, cir::UnaryOpKind::Plus);
449470
}
@@ -937,9 +958,11 @@ mlir::Value CIRGenFunction::emitPromotedScalarExpr(const Expr *e,
937958
}
938959

939960
[[maybe_unused]] static bool mustVisitNullValue(const Expr *e) {
940-
// If a null pointer expression's type is the C++0x nullptr_t, then
941-
// it's not necessarily a simple constant and it must be evaluated
961+
// If a null pointer expression's type is the C++0x nullptr_t and
962+
// the expression is not a simple literal, it must be evaluated
942963
// for its potential side effects.
964+
if (isa<IntegerLiteral>(e) || isa<CXXNullPtrLiteralExpr>(e))
965+
return false;
943966
return e->getType()->isNullPtrType();
944967
}
945968

clang/lib/CIR/CodeGen/CIRGenFunction.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,17 @@ class CIRGenFunction : public CIRGenTypeCache {
222222
// TODO: Add symbol table support
223223
}
224224

225+
/// Construct an address with the natural alignment of T. If a pointer to T
226+
/// is expected to be signed, the pointer passed to this function must have
227+
/// been signed, and the returned Address will have the pointer authentication
228+
/// information needed to authenticate the signed pointer.
229+
Address makeNaturalAddressForPointer(mlir::Value ptr, QualType t,
230+
CharUnits alignment) {
231+
if (alignment.isZero())
232+
alignment = cgm.getNaturalTypeAlignment(t);
233+
return Address(ptr, convertTypeForMem(t), alignment);
234+
}
235+
225236
cir::FuncOp generateCode(clang::GlobalDecl gd, cir::FuncOp fn,
226237
cir::FuncType funcType);
227238

@@ -468,6 +479,18 @@ class CIRGenFunction : public CIRGenTypeCache {
468479
/// FIXME: document this function better.
469480
LValue emitLValue(const clang::Expr *e);
470481

482+
/// Given an expression with a pointer type, emit the value and compute our
483+
/// best estimate of the alignment of the pointee.
484+
///
485+
/// One reasonable way to use this information is when there's a language
486+
/// guarantee that the pointer must be aligned to some stricter value, and
487+
/// we're simply trying to ensure that sufficiently obvious uses of under-
488+
/// aligned objects don't get miscompiled; for example, a placement new
489+
/// into the address of a local variable. In such a case, it's quite
490+
/// reasonable to just ignore the returned alignment when it isn't from an
491+
/// explicit source.
492+
Address emitPointerWithAlignment(const clang::Expr *expr);
493+
471494
mlir::LogicalResult emitReturnStmt(const clang::ReturnStmt &s);
472495

473496
/// Emit a conversion from the specified type to the specified destination

clang/lib/CIR/CodeGen/CIRGenModule.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,57 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext,
7474
builder.getStringAttr(getTriple().str()));
7575
}
7676

77+
CharUnits CIRGenModule::getNaturalTypeAlignment(QualType t) {
78+
assert(!cir::MissingFeatures::opTBAA());
79+
80+
// FIXME: This duplicates logic in ASTContext::getTypeAlignIfKnown. But
81+
// that doesn't return the information we need to compute BaseInfo.
82+
83+
// Honor alignment typedef attributes even on incomplete types.
84+
// We also honor them straight for C++ class types, even as pointees;
85+
// there's an expressivity gap here.
86+
if (const auto *tt = t->getAs<TypedefType>()) {
87+
if (unsigned align = tt->getDecl()->getMaxAlignment()) {
88+
assert(!cir::MissingFeatures::lvalueBaseInfo());
89+
return astContext.toCharUnitsFromBits(align);
90+
}
91+
}
92+
93+
// Analyze the base element type, so we don't get confused by incomplete
94+
// array types.
95+
t = astContext.getBaseElementType(t);
96+
97+
if (t->isIncompleteType()) {
98+
// We could try to replicate the logic from
99+
// ASTContext::getTypeAlignIfKnown, but nothing uses the alignment if the
100+
// type is incomplete, so it's impossible to test. We could try to reuse
101+
// getTypeAlignIfKnown, but that doesn't return the information we need
102+
// to set BaseInfo. So just ignore the possibility that the alignment is
103+
// greater than one.
104+
assert(!cir::MissingFeatures::lvalueBaseInfo());
105+
return CharUnits::One();
106+
}
107+
108+
assert(!cir::MissingFeatures::lvalueBaseInfo());
109+
110+
CharUnits alignment;
111+
if (t.getQualifiers().hasUnaligned()) {
112+
alignment = CharUnits::One();
113+
} else {
114+
assert(!cir::MissingFeatures::alignCXXRecordDecl());
115+
alignment = astContext.getTypeAlignInChars(t);
116+
}
117+
118+
// Cap to the global maximum type alignment unless the alignment
119+
// was somehow explicit on the type.
120+
if (unsigned maxAlign = astContext.getLangOpts().MaxTypeAlign) {
121+
if (alignment.getQuantity() > maxAlign &&
122+
!astContext.isAlignmentRequired(t))
123+
alignment = CharUnits::fromQuantity(maxAlign);
124+
}
125+
return alignment;
126+
}
127+
77128
mlir::Location CIRGenModule::getLoc(SourceLocation cLoc) {
78129
assert(cLoc.isValid() && "expected valid source location");
79130
const SourceManager &sm = astContext.getSourceManager();

clang/lib/CIR/CodeGen/CIRGenModule.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@ class CIRGenModule : public CIRGenTypeCache {
8989
mlir::Location getLoc(clang::SourceLocation cLoc);
9090
mlir::Location getLoc(clang::SourceRange cRange);
9191

92+
/// FIXME: this could likely be a common helper and not necessarily related
93+
/// with codegen.
94+
clang::CharUnits getNaturalTypeAlignment(clang::QualType t);
95+
9296
void emitTopLevelDecl(clang::Decl *decl);
9397

9498
bool verifyModule() const;

clang/lib/CIR/CodeGen/CIRGenTypes.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,14 @@ mlir::Type CIRGenTypes::convertType(QualType type) {
183183
resultType = cgm.SInt32Ty;
184184
break;
185185

186+
case BuiltinType::NullPtr:
187+
// Add proper CIR type for it? this looks mostly useful for sema related
188+
// things (like for overloads accepting void), for now, given that
189+
// `sizeof(std::nullptr_t)` is equal to `sizeof(void *)`, model
190+
// std::nullptr_t as !cir.ptr<!void>
191+
resultType = builder.getVoidPtrTy();
192+
break;
193+
186194
default:
187195
cgm.errorNYI(SourceLocation(), "processing of built-in type", type);
188196
resultType = cgm.SInt32Ty;

clang/lib/CIR/CodeGen/CIRGenTypes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class CIRGenTypes {
7474

7575
/// Return whether a type can be zero-initialized (in the C++ sense) with an
7676
/// LLVM zeroinitializer.
77-
bool isZeroInitializable(clang::QualType t);
77+
bool isZeroInitializable(clang::QualType ty);
7878
};
7979

8080
} // namespace clang::CIRGen

0 commit comments

Comments
 (0)