-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[mlir][llvm] support -new-struct-path-tbaa #119698
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-llvm Author: PikachuHy (PikachuHyA) ChangesFull diff: https://github.com/llvm/llvm-project/pull/119698.diff 5 Files Affected:
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
index e8eeafd09a9cba..198e1f8982ef14 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
@@ -1080,8 +1080,84 @@ def LLVM_TBAATagAttr : LLVM_Attr<"TBAATag", "tbaa_tag"> {
let assemblyFormat = "`<` struct(params) `>`";
}
+def LLVM_TBAAStructFieldAttr : LLVM_Attr<"TBAAStructField", "tbaa_struct_field"> {
+ let parameters = (ins
+ "TBAANodeAttr":$typeDesc,
+ "int64_t":$offset,
+ "int64_t":$size
+ );
+ let assemblyFormat = "`<` struct(params) `>`";
+}
+
+
+def LLVM_TBAAStructFieldAttrArray : ArrayRefParameter<"TBAAStructFieldAttr"> {
+ let printer = [{
+ $_printer << '{';
+ llvm::interleaveComma($_self, $_printer, [&](TBAAStructFieldAttr attr) {
+ $_printer.printStrippedAttrOrType(attr);
+ });
+ $_printer << '}';
+ }];
+
+ let parser = [{
+ [&]() -> FailureOr<SmallVector<TBAAStructFieldAttr>> {
+ using Result = SmallVector<TBAAStructFieldAttr>;
+ if ($_parser.parseLBrace())
+ return failure();
+ FailureOr<Result> result = FieldParser<Result>::parse($_parser);
+ if (failed(result))
+ return failure();
+ if ($_parser.parseRBrace())
+ return failure();
+ return result;
+ }()
+ }];
+}
+
+def LLVM_TBAATypeNodeAttr : LLVM_Attr<"TBAATypeNode", "tbaa_type_node", [], "TBAANodeAttr"> {
+ let parameters = (ins
+ "TBAANodeAttr":$parent,
+ "int64_t":$size,
+ StringRefParameter<>:$id,
+ LLVM_TBAAStructFieldAttrArray:$fields
+ );
+ let assemblyFormat = "`<` struct(params) `>`";
+}
+
+def LLVM_TBAAAccessTagAttr : LLVM_Attr<"TBAAAccessTag", "tbaa_access_tag"> {
+ let parameters = (ins
+ "TBAATypeNodeAttr":$base_type,
+ "TBAATypeNodeAttr":$access_type,
+ "int64_t":$offset,
+ "int64_t":$size
+ );
+ let builders = [
+ AttrBuilderWithInferredContext<(ins "TBAATypeNodeAttr":$baseType,
+ "TBAATypeNodeAttr":$accessType,
+ "int64_t":$offset,
+ "int64_t":$size), [{
+ return $_get(baseType.getContext(), baseType, accessType, offset, size);
+ }]>
+ ];
+ let assemblyFormat = "`<` struct(params) `>`";
+}
+
+def LLVM_TBAAAccessTagArrayAttr
+ : TypedArrayAttrBase<LLVM_TBAAAccessTagAttr,
+ LLVM_TBAAAccessTagAttr.summary # " array"> {
+ let constBuilderCall = ?;
+}
+
+// def LLVM_TBAATagAttr2 : AnyAttrOf<[
+// LLVM_TBAATagAttr,
+// LLVM_TBAAAccessTagAttr
+// ]>;
+
def LLVM_TBAATagArrayAttr
- : TypedArrayAttrBase<LLVM_TBAATagAttr,
+ : TypedArrayAttrBase<AnyAttrOf<[
+ LLVM_TBAATagAttr,
+ LLVM_TBAAAccessTagAttr
+]>,
LLVM_TBAATagAttr.summary # " array"> {
let constBuilderCall = ?;
}
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index ffeeeae57ae952..c7a79aa330d3da 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -323,7 +323,7 @@ class ModuleTranslation {
/// Returns the LLVM metadata corresponding to the given mlir LLVM dialect
/// TBAATagAttr.
- llvm::MDNode *getTBAANode(TBAATagAttr tbaaAttr) const;
+ llvm::MDNode *getTBAANode(Attribute tbaaAttr) const;
/// Process tbaa LLVM Metadata operations and create LLVM
/// metadata nodes for them.
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index 6b2d8943bf4885..b2b0b9b331e0b4 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -3401,7 +3401,8 @@ struct LLVMOpAsmDialectInterface : public OpAsmDialectInterface {
LoopVectorizeAttr, LoopInterleaveAttr, LoopUnrollAttr,
LoopUnrollAndJamAttr, LoopLICMAttr, LoopDistributeAttr,
LoopPipelineAttr, LoopPeeledAttr, LoopUnswitchAttr, TBAARootAttr,
- TBAATagAttr, TBAATypeDescriptorAttr>([&](auto attr) {
+ TBAATagAttr, TBAATypeDescriptorAttr, TBAAAccessTagAttr,
+ TBAATypeNodeAttr>([&](auto attr) {
os << decltype(attr)::getMnemonic();
return AliasResult::OverridableAlias;
})
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp
index cff16afc73af3f..6a9395b1f4a26e 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp
@@ -58,7 +58,15 @@ mlir::LLVM::detail::verifyAliasAnalysisOpInterface(Operation *op) {
ArrayAttr tags = iface.getTBAATagsOrNull();
if (!tags)
return success();
-
+ if (tags.size() > 0) {
+ if (mlir::isa<TBAATagAttr>(tags[0])) {
+ return isArrayOf<TBAATagAttr>(op, tags);
+ }
+
+ if (mlir::isa<TBAAAccessTagAttr>(tags[0])) {
+ return isArrayOf<TBAAAccessTagAttr>(op, tags);
+ }
+ }
return isArrayOf<TBAATagAttr>(op, tags);
}
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index ceb8ba3b33818b..6a6c29869ba805 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1766,7 +1766,8 @@ void ModuleTranslation::setAliasScopeMetadata(AliasAnalysisOpInterface op,
llvm::LLVMContext::MD_noalias);
}
-llvm::MDNode *ModuleTranslation::getTBAANode(TBAATagAttr tbaaAttr) const {
+// llvm::MDNode *ModuleTranslation::getTBAANode(TBAATagAttr tbaaAttr) const {
+llvm::MDNode *ModuleTranslation::getTBAANode(Attribute tbaaAttr) const {
return tbaaMetadataMapping.lookup(tbaaAttr);
}
@@ -1786,7 +1787,8 @@ void ModuleTranslation::setTBAAMetadata(AliasAnalysisOpInterface op,
return;
}
- llvm::MDNode *node = getTBAANode(cast<TBAATagAttr>(tagRefs[0]));
+ // llvm::MDNode *node = getTBAANode(cast<TBAATagAttr>(tagRefs[0]));
+ llvm::MDNode *node = getTBAANode(tagRefs[0]);
inst->setMetadata(llvm::LLVMContext::MD_tbaa, node);
}
@@ -1806,6 +1808,7 @@ void ModuleTranslation::setBranchWeightsMetadata(BranchWeightOpInterface op) {
LogicalResult ModuleTranslation::createTBAAMetadata() {
llvm::LLVMContext &ctx = llvmModule->getContext();
llvm::IntegerType *offsetTy = llvm::IntegerType::get(ctx, 64);
+ llvm::IntegerType *sizeTy = llvm::IntegerType::get(ctx, 64);
// Walk the entire module and create all metadata nodes for the TBAA
// attributes. The code below relies on two invariants of the
@@ -1833,6 +1836,23 @@ LogicalResult ModuleTranslation::createTBAAMetadata() {
tbaaMetadataMapping.insert({descriptor, llvm::MDNode::get(ctx, operands)});
});
+ walker.addWalk([&](TBAATypeNodeAttr descriptor) {
+ SmallVector<llvm::Metadata *> operands;
+ operands.push_back(tbaaMetadataMapping.lookup(descriptor.getParent()));
+ operands.push_back(llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(sizeTy, descriptor.getSize())));
+ operands.push_back(llvm::MDString::get(ctx, descriptor.getId()));
+ for (auto field : descriptor.getFields()) {
+ operands.push_back(tbaaMetadataMapping.lookup(field.getTypeDesc()));
+ operands.push_back(llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(offsetTy, field.getOffset())));
+ operands.push_back(llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(sizeTy, field.getSize())));
+ }
+
+ tbaaMetadataMapping.insert({descriptor, llvm::MDNode::get(ctx, operands)});
+ });
+
walker.addWalk([&](TBAATagAttr tag) {
SmallVector<llvm::Metadata *> operands;
@@ -1848,6 +1868,20 @@ LogicalResult ModuleTranslation::createTBAAMetadata() {
tbaaMetadataMapping.insert({tag, llvm::MDNode::get(ctx, operands)});
});
+ walker.addWalk([&](TBAAAccessTagAttr tag) {
+ SmallVector<llvm::Metadata *> operands;
+
+ operands.push_back(tbaaMetadataMapping.lookup(tag.getBaseType()));
+ operands.push_back(tbaaMetadataMapping.lookup(tag.getAccessType()));
+
+ operands.push_back(llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(offsetTy, tag.getOffset())));
+ operands.push_back(llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(sizeTy, tag.getSize())));
+
+ tbaaMetadataMapping.insert({tag, llvm::MDNode::get(ctx, operands)});
+ });
+
mlirModule->walk([&](AliasAnalysisOpInterface analysisOpInterface) {
if (auto attr = analysisOpInterface.getTBAATagsOrNull())
walker.walk(attr);
|
0db13b8
to
a18a074
Compare
a18a074
to
df5635f
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!
The direction looks good.
With regards to the attribute names are they derived from LLVM? Does the new format somehow have a name that we could potentially use to prefix/postfix things. The names Tag and AccessTag for almost the same thing are a bit confusing.
For now I mostly looked at the attribute definitions. Once they are set it may make sense to proceed with some tests similar to:
- tbaa-rountrip.mlir
- tbaa-invalid.mlir
In addition, some export tests will make sense as well.
}]; | ||
} | ||
|
||
def LLVM_TBAATypeNodeAttr : LLVM_Attr<"TBAATypeNode", "tbaa_type_node", [], "TBAANodeAttr"> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe if the class should derive from TBAANodeAttr
it also needs to be added here:
return llvm::isa<TBAATypeDescriptorAttr, TBAARootAttr>(attr); |
@@ -1084,8 +1084,79 @@ def LLVM_TBAATagAttr : LLVM_Attr<"TBAATag", "tbaa_tag"> { | |||
let assemblyFormat = "`<` struct(params) `>`"; | |||
} | |||
|
|||
def LLVM_TBAAStructFieldAttr : LLVM_Attr<"TBAAStructField", "tbaa_struct_field"> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
def LLVM_TBAAStructFieldAttr : LLVM_Attr<"TBAAStructField", "tbaa_struct_field"> { | |
//===----------------------------------------------------------------------===// | |
// TBAAStructFieldAttr | |
//===----------------------------------------------------------------------===// | |
def LLVM_TBAAStructFieldAttr : LLVM_Attr<"TBAAStructField", "tbaa_struct_field"> { |
nit: let's use banners to structure the file a bit.
"int64_t":$offset, | ||
"int64_t":$size | ||
); | ||
let assemblyFormat = "`<` struct(params) `>`"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you use let description / let summary to provide some doc/examples for the relevant attributes.
|
||
def LLVM_TBAATypeNodeAttr : LLVM_Attr<"TBAATypeNode", "tbaa_type_node", [], "TBAANodeAttr"> { | ||
let parameters = (ins | ||
"TBAANodeAttr":$parent, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are new and old format mixed? Or in other words can this be a TBAATypeDescriptorAttr
? If not we may want to have an attribute verifier that ensures the parent is either a root or another LLVM_TBAATypeNodeAttr.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are new and old format mixed? Or in other words can this be a
TBAATypeDescriptorAttr
?
No.
The only other TBAANodeAttr
used by LLVM_TBAATypeNodeAttr
is LLVM_TBAARootAttr
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense.
If we want to verify this we have sort of two options:
- we add a verifier to attribute that checks that parent is not a
TBAATypeDescriptorAttr
. - We implement a separate base class for the new TBBAA format which could be called
TBAAAccessNodeAttr
if we follow the naming scheme of the access tag. However, this also requires
aTBAAAccessRootAttr
since an attribute can only inherit from one base class AFAIK.
I do not have a clear preference.
"TBAATypeNodeAttr":$base_type, | ||
"TBAATypeNodeAttr":$access_type, | ||
"int64_t":$offset, | ||
"int64_t":$size |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It feels like the difference to TBAATagAttr is minimal (i.e mainly the size). However, I guess it still makes sense to separate them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Another option is to add an optional "int64_t": $size
in TBAATagAttr
. However, this approach carries the risk of mixing the old and new formats.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think given LLVM proper has a counter part and since the two TBAA variants cannot be mixed, I am actually in favor of having a separate LLVM_TBAAAccessTagAttr!
let assemblyFormat = "`<` struct(params) `>`"; | ||
} | ||
|
||
def LLVM_TBAAAccessTagArrayAttr |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this used?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no.
The array of LLVM_TBAAAccessTagAttr
is used by the following definition:
def LLVM_TBAATagArrayAttr
: TypedArrayAttrBase<AnyAttrOf<[
LLVM_TBAATagAttr,
LLVM_TBAAAccessTagAttr
]>
Therefore, the LLVM_TBAAAccessTagArrayAttr
is no longer needed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed.
@@ -58,7 +58,15 @@ mlir::LLVM::detail::verifyAliasAnalysisOpInterface(Operation *op) { | |||
ArrayAttr tags = iface.getTBAATagsOrNull(); | |||
if (!tags) | |||
return success(); | |||
|
|||
if (tags.size() > 0) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't it just be:
return isArrayOf<TBAATagAttr>(op, tags) || isArrayOf<TBAAAccessTagAttr>(op, tags);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The isArrayOf<xxx>
reports an error if any element in the provided array is not type of xxx.
and the result type LogicalResult
dose not support the ||
operator.
/// Verifies that all elements of `array` are instances of `Attr`.
template <class AttrT>
static LogicalResult isArrayOf(Operation *op, ArrayAttr array) {
for (Attribute iter : array)
if (!isa<AttrT>(iter))
return op->emitOpError("expected op to return array of ")
<< AttrT::getMnemonic() << " attributes";
return success();
}
see
https://github.com/llvm/llvm-project/blob/main/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp#L19-L27
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok I understand. I would suggest to extend the AliasAnalysisOpInterface. It currently has one function that returns the TBAA tags. How about implementing two functions:
iface.getTBBATagsOrNull()
and
iface.getTBBAAccessTagsOrNull()
This could potentially be implemented by having two arrays stored on the alias operations. Or by checking if the array contains the old or the new kind of TBAA attributes. That way the verifier can use the existing logic for the two cases separately. And to users of the exiting TBAA attributes don't need to understand the new kind of TBAA metadata.
WDYT?
} | ||
|
||
|
||
def LLVM_TBAAStructFieldAttrArray : ArrayRefParameter<"TBAAStructFieldAttr"> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder why we implemented our own array attributes at the time. @vzakhari do you remember? Potentially we could just use:
OptionalArrayRefParameter<"TBAAStructFieldAttr">:$fields
In LLVM_TBAATypeNodeAttr.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I do not remember. It would probably work either way.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@PikachuHyA It is quite a lot of custom printing / parsing that could be saved with OptionalArrayRefParameter<"TBAAStructFieldAttr">:$fields
. So maybe we want to give it a shot.
hi @gysit , thanks for your comments. I've encountered a crash, so I need more time to troubleshoot the issue.
Regarding your question about the attribute names: they are derived from function names and class name in CodeGenTBAA.cpp the name of
the name of https://github.com/llvm/llvm-project/blob/main/clang/lib/CodeGen/CodeGenTBAA.cpp#L636-L639 the name of https://github.com/llvm/llvm-project/blob/main/clang/lib/CodeGen/CodeGenTBAA.cpp#L508-L513 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the naming link. I believe it makes sense to follow this naming convention then. I would have liked some consistent prefix such as TBAA_NEW_tag, TBAA_NEW_type_descriptor etc. But then it is probably unclear what a good choice for NEW would be. In any case, we should stick to what LLVM uses.
"TBAATypeNodeAttr":$base_type, | ||
"TBAATypeNodeAttr":$access_type, | ||
"int64_t":$offset, | ||
"int64_t":$size |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think given LLVM proper has a counter part and since the two TBAA variants cannot be mixed, I am actually in favor of having a separate LLVM_TBAAAccessTagAttr!
|
||
def LLVM_TBAATypeNodeAttr : LLVM_Attr<"TBAATypeNode", "tbaa_type_node", [], "TBAANodeAttr"> { | ||
let parameters = (ins | ||
"TBAANodeAttr":$parent, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense.
If we want to verify this we have sort of two options:
- we add a verifier to attribute that checks that parent is not a
TBAATypeDescriptorAttr
. - We implement a separate base class for the new TBBAA format which could be called
TBAAAccessNodeAttr
if we follow the naming scheme of the access tag. However, this also requires
aTBAAAccessRootAttr
since an attribute can only inherit from one base class AFAIK.
I do not have a clear preference.
@@ -58,7 +58,15 @@ mlir::LLVM::detail::verifyAliasAnalysisOpInterface(Operation *op) { | |||
ArrayAttr tags = iface.getTBAATagsOrNull(); | |||
if (!tags) | |||
return success(); | |||
|
|||
if (tags.size() > 0) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok I understand. I would suggest to extend the AliasAnalysisOpInterface. It currently has one function that returns the TBAA tags. How about implementing two functions:
iface.getTBBATagsOrNull()
and
iface.getTBBAAccessTagsOrNull()
This could potentially be implemented by having two arrays stored on the alias operations. Or by checking if the array contains the old or the new kind of TBAA attributes. That way the verifier can use the existing logic for the two cases separately. And to users of the exiting TBAA attributes don't need to understand the new kind of TBAA metadata.
WDYT?
This patch introduces attributes such as
LLVM_TBAAStructFieldAttr
,LLVM_TBAATypeNodeAttr
, andLLVM_TBAAAccessTagAttr
that are specifically designed to handle the new struct path TBAA.