Skip to content

[DebugInfo] Use DW_op_bit_piece for structured bindings of bitfields #85665

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 9 additions & 43 deletions clang/lib/CodeGen/CGDebugInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4777,40 +4777,6 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
return D;
}

llvm::DIType *CGDebugInfo::CreateBindingDeclType(const BindingDecl *BD) {
llvm::DIFile *Unit = getOrCreateFile(BD->getLocation());

// If the declaration is bound to a bitfield struct field, its type may have a
// size that is different from its deduced declaration type's.
if (const MemberExpr *ME = dyn_cast<MemberExpr>(BD->getBinding())) {
if (const FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
if (FD->isBitField()) {
ASTContext &Context = CGM.getContext();
const CGRecordLayout &RL =
CGM.getTypes().getCGRecordLayout(FD->getParent());
const CGBitFieldInfo &Info = RL.getBitFieldInfo(FD);

// Find an integer type with the same bitwidth as the bitfield size. If
// no suitable type is present in the target, give up on producing debug
// information as it would be wrong. It is certainly possible to produce
// correct debug info, but the logic isn't currently implemented.
uint64_t BitfieldSizeInBits = Info.Size;
QualType IntTy =
Context.getIntTypeForBitwidth(BitfieldSizeInBits, Info.IsSigned);
if (IntTy.isNull())
return nullptr;
Qualifiers Quals = BD->getType().getQualifiers();
QualType FinalTy = Context.getQualifiedType(IntTy, Quals);
llvm::DIType *Ty = getOrCreateType(FinalTy, Unit);
assert(Ty);
return Ty;
}
}
}

return getOrCreateType(BD->getType(), Unit);
}

llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD,
llvm::Value *Storage,
std::optional<unsigned> ArgNo,
Expand All @@ -4825,7 +4791,8 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD,
if (isa<DeclRefExpr>(BD->getBinding()))
return nullptr;

llvm::DIType *Ty = CreateBindingDeclType(BD);
llvm::DIFile *Unit = getOrCreateFile(BD->getLocation());
llvm::DIType *Ty = getOrCreateType(BD->getType(), Unit);

// If there is no debug info for this type then do not emit debug info
// for this variable.
Expand All @@ -4851,7 +4818,6 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD,
unsigned Column = getColumnNumber(BD->getLocation());
StringRef Name = BD->getName();
auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back());
llvm::DIFile *Unit = getOrCreateFile(BD->getLocation());
// Create the descriptor for the variable.
llvm::DILocalVariable *D = DBuilder.createAutoVariable(
Scope, Name, Unit, Line, Ty, CGM.getLangOpts().Optimize,
Expand All @@ -4865,13 +4831,13 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD,
const ASTRecordLayout &layout =
CGM.getContext().getASTRecordLayout(parent);
const uint64_t fieldOffset = layout.getFieldOffset(fieldIndex);

if (fieldOffset != 0) {
// Currently if the field offset is not a multiple of byte, the produced
// location would not be accurate. Therefore give up.
if (fieldOffset % CGM.getContext().getCharWidth() != 0)
return nullptr;

if (FD->isBitField()) {
Expr.push_back(llvm::dwarf::DW_OP_bit_piece);
Expr.push_back(FD->getBitWidthValue(CGM.getContext()));
Expr.push_back(fieldOffset);
} else if (fieldOffset != 0) {
assert(fieldOffset % CGM.getContext().getCharWidth() == 0 &&
"Unexpected non-bitfield with non-byte-aligned offset");
Expr.push_back(llvm::dwarf::DW_OP_plus_uconst);
Expr.push_back(
CGM.getContext().toCharUnitsFromBits(fieldOffset).getQuantity());
Expand Down
3 changes: 0 additions & 3 deletions clang/lib/CodeGen/CGDebugInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,9 +337,6 @@ class CGDebugInfo {
llvm::DIScope *RecordTy,
const RecordDecl *RD);

/// Create type for binding declarations.
llvm::DIType *CreateBindingDeclType(const BindingDecl *BD);

/// Create an anonnymous zero-size separator for bit-field-decl if needed on
/// the target.
llvm::DIDerivedType *createBitFieldSeparatorIfNeeded(
Expand Down
118 changes: 56 additions & 62 deletions clang/test/CodeGenCXX/debug-info-structured-binding-bitfield.cpp

Large diffs are not rendered by default.

14 changes: 10 additions & 4 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6178,10 +6178,16 @@ The current supported opcode vocabulary is limited:
the last entry from the second last entry and appends the result to the
expression stack.
- ``DW_OP_plus_uconst, 93`` adds ``93`` to the working expression.
- ``DW_OP_LLVM_fragment, 16, 8`` specifies the offset and size (``16`` and ``8``
here, respectively) of the variable fragment from the working expression. Note
that contrary to DW_OP_bit_piece, the offset is describing the location
within the described source variable.
- ``DW_OP_LLVM_fragment, 16, 8`` specifies that the top of the expression stack
is a fragment of the source language variable with the given offset and size
(``16`` and ``8`` here, respectively). Note that the offset and size are the
opposite way around to ``DW_OP_bit_piece``, and the offset is within the
source language variable.
- ``DW_OP_bit_piece, 8, 16`` specifies that the source language variable can be
found in the sequence of bits at the given size and offset (``8`` and ``16``
here, respectively) within the top of the expression stack. Note that the
offset and size are the opposite way around to ``DW_OP_LLVM_fragment``, and the
offset is within the LLVM variable (if that's at the top of the stack).
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One thing that is missing right now is discussion whether DW_OP_bit_piece can be used inside of a DW_OP_LLVM_fragment expression.

  • If a DW_OP_bit_piece generates a fragment on its own, then combining the two should be made illegal in the verifier, and then it needs to be handled everywhere we handle DW_OP_LLVM_fragment.
  • If the two can be combined then DWARFExpression needs handle that case, and we probably need a few more testcases for all the edge cases.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we need more testcases for the edge cases regardless of which way we decide.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's already a verifier check that DW_OP_LLVM_fragment is the last expression in the list, so I've made DW_OP_bit_piece do the same.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So that effectively implements option (1) that I outlined above. I think this is a reasonable choice and consistent with how DWARF uses them. So if we do this (and I think that's fine) we need make sure that the bit pieces are correctly reported by DIExpression::getFragmentInfo() or else they will get incorrectly treated by every pass that needs to determine overlapping fragments.
At the very least there should be tests that have LLVM IR that mix & matches DW_OP_bit_piece and DW_OP_LLVM_fragment for the same variable and confirm that this gets lowered into valid DWARF.
Alternatively, if you want to make quick progress, you can also say that we don't support this for now and actively reject mixing the two within the same variable in the Verifyer. The you'd need to make sure that passes like SROA that introduce new fragments deal with this gracefully by creating undef locations.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So that effectively implements option (1) that I outlined above. I think this is a reasonable choice and consistent with how DWARF uses them. So if we do this (and I think that's fine) we need make sure that the bit pieces are correctly reported by DIExpression::getFragmentInfo() or else they will get incorrectly treated by every pass that needs to determine overlapping fragments.

I'm handling this in the next part, which is SROA support (https://github.com/john-brawn-arm/llvm-project/tree/sroa_bit_piece), though it turns out in most of the places that getFragmentInfo is used it actually is specific to DW_OP_LLVM_fragment and does the wrong thing for DW_OP_bit_piece, so I think adding a separate getBitPieceInfo is simpler.

At the very least there should be tests that have LLVM IR that mix & matches DW_OP_bit_piece and DW_OP_LLVM_fragment for the same variable and confirm that this gets lowered into valid DWARF. Alternatively, if you want to make quick progress, you can also say that we don't support this for now and actively reject mixing the two within the same variable in the Verifyer. The you'd need to make sure that passes like SROA that introduce new fragments deal with this gracefully by creating undef locations.

Currently if the same variable has a DW_OP_LLVM_fragment and anything else this causes assertion failures in DebugLocEntry::finalize in llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp.

Copy link
Collaborator

@adrian-prantl adrian-prantl May 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it turns out in most of the places that getFragmentInfo is used it actually is specific to DW_OP_LLVM_fragment and does the wrong thing for DW_OP_bit_piece, so I think adding a separate getBitPieceInfo is simpler.

I am mostly worried about the places that want to answer questions like DIExpression::fragmentsOverlap() in order to determine whether a fragment (or bit_piece) should replace or augment another dbg.value or DBG_VALUE for the same variable.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DW_OP_bit_piece is like every other standard dwarf expression: if we see a new location for a variable then the variable is now at that location. The current behaviour for DIExpression::fragmentsOverlap() is that it returns true when the expression isn't a fragment, and that's what we want for DW_OP_bit_piece.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DW_OP_bit_piece is like every other standard dwarf expression

Not it's not. It's a composite location description delimiter and cannot appear inside a DWARF expression. (See DWARF 5 section 2.6.1.2). That's why it's important that all LLVM algorithms that need to reason about fragments (the LLVM equivalent of composite location descriptions) are aware of them.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see, I hadn't noticed that (I'd just read the description of DW_OP_bit_piece and hadn't read the general description of composite location descriptions). I had been understanding something like "DW_OP_reg0 RAX, DW_OP_bit_piece 0x3 0x0" to mean "this variable can be found in the 3 bits at offset 0 in RAX", but it actually means something closer to "the bottom 3 bits of this variable can be found in the 3 bits at offset 0 in RAX".

I think this doesn't make much difference though if we restrict DW_OP_bit_piece in the llvm.dbg intrinsics to use it only for its ability to describe a value stored in part of a register, with the other bits not being described, as I think that's what the behaviour I've currently implemented is. Though maybe it would be better to instead use DW_OP_and and DW_OP_shr to extract the bits from the register. I'll think about this some more.

- ``DW_OP_LLVM_convert, 16, DW_ATE_signed`` specifies a bit size and encoding
(``16`` and ``DW_ATE_signed`` here, respectively) to which the top of the
expression stack is to be converted. Maps into a ``DW_OP_convert`` operation
Expand Down
13 changes: 10 additions & 3 deletions llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,8 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,

bool HasComplexExpression = false;
auto Op = ExprCursor.peek();
if (Op && Op->getOp() != dwarf::DW_OP_LLVM_fragment)
if (Op && Op->getOp() != dwarf::DW_OP_LLVM_fragment &&
Op->getOp() != dwarf::DW_OP_bit_piece)
HasComplexExpression = true;

// If the register can only be described by a complex expression (i.e.,
Expand Down Expand Up @@ -314,7 +315,8 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
// operation would emit an OpPiece anyway.
auto NextOp = ExprCursor.peek();
if (SubRegisterSizeInBits && NextOp &&
(NextOp->getOp() != dwarf::DW_OP_LLVM_fragment))
NextOp->getOp() != dwarf::DW_OP_LLVM_fragment &&
NextOp->getOp() != dwarf::DW_OP_bit_piece)
maskSubRegister();
return true;
}
Expand Down Expand Up @@ -382,7 +384,8 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
// operation would emit an OpPiece anyway.
auto NextOp = ExprCursor.peek();
if (SubRegisterSizeInBits && NextOp &&
(NextOp->getOp() != dwarf::DW_OP_LLVM_fragment))
NextOp->getOp() != dwarf::DW_OP_LLVM_fragment &&
NextOp->getOp() != dwarf::DW_OP_bit_piece)
maskSubRegister();

return true;
Expand Down Expand Up @@ -473,6 +476,7 @@ static bool isMemoryLocation(DIExpressionCursor ExprCursor) {
switch (Op->getOp()) {
case dwarf::DW_OP_deref:
case dwarf::DW_OP_LLVM_fragment:
case dwarf::DW_OP_bit_piece:
break;
default:
return false;
Expand Down Expand Up @@ -645,6 +649,9 @@ bool DwarfExpression::addExpression(
emitUnsigned(Op->getArg(0));
emitSigned(Op->getArg(1));
break;
case dwarf::DW_OP_bit_piece:
addOpPiece(Op->getArg(0), Op->getArg(1));
break;
default:
llvm_unreachable("unhandled opcode found in expression");
}
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/IR/DebugInfoMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1395,6 +1395,7 @@ unsigned DIExpression::ExprOperand::getSize() const {
case dwarf::DW_OP_LLVM_convert:
case dwarf::DW_OP_LLVM_fragment:
case dwarf::DW_OP_bregx:
case dwarf::DW_OP_bit_piece:
return 3;
case dwarf::DW_OP_constu:
case dwarf::DW_OP_consts:
Expand Down Expand Up @@ -1426,7 +1427,8 @@ bool DIExpression::isValid() const {
default:
return false;
case dwarf::DW_OP_LLVM_fragment:
// A fragment operator must appear at the end.
case dwarf::DW_OP_bit_piece:
// A fragment or bit piece operator must appear at the end.
return I->get() + I->getSize() == E->get();
case dwarf::DW_OP_stack_value: {
// Must be the last one or followed by a DW_OP_LLVM_fragment.
Expand Down
75 changes: 75 additions & 0 deletions llvm/test/DebugInfo/X86/DW_OP_bit_piece.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o %t -filetype=obj
; RUN: llvm-dwarfdump --debug-info %t | FileCheck %s

%struct.struct_t = type { i8 }

@g = dso_local global %struct.struct_t zeroinitializer, align 1, !dbg !0

; CHECK-LABEL: DW_TAG_subprogram
; CHECK: DW_AT_name ("test1")
; CHECK: DW_TAG_variable
; CHECK: DW_AT_location (DW_OP_fbreg -1, DW_OP_bit_piece 0x3 0x0)
; CHECK: DW_AT_name ("x")
; CHECK: DW_TAG_variable
; CHECK: DW_AT_location (DW_OP_fbreg -1, DW_OP_bit_piece 0x4 0x3)
; CHECK: DW_AT_name ("y")

define i32 @test1() !dbg !13 {
entry:
%0 = alloca %struct.struct_t, align 1
tail call void @llvm.dbg.declare(metadata ptr %0, metadata !17, metadata !DIExpression(DW_OP_bit_piece, 3, 0)), !dbg !18
tail call void @llvm.dbg.declare(metadata ptr %0, metadata !19, metadata !DIExpression(DW_OP_bit_piece, 4, 3)), !dbg !20
ret i32 0, !dbg !21
}

; CHECK-LABEL: DW_TAG_subprogram
; CHECK: DW_AT_name ("test2")
; CHECK: DW_TAG_variable
; CHECK: DW_AT_location (DW_OP_reg0 RAX, DW_OP_bit_piece 0x3 0x0)
; CHECK: DW_AT_name ("x")
; CHECK: DW_TAG_variable
; CHECK: DW_AT_location (DW_OP_reg0 RAX, DW_OP_bit_piece 0x4 0x3)
; CHECK: DW_AT_name ("y")

define i8 @test2() !dbg !22 {
entry:
%0 = load i8, ptr @g, align 1
tail call void @llvm.dbg.value(metadata i8 %0, metadata !23, metadata !DIExpression(DW_OP_bit_piece, 3, 0)), !dbg !24
tail call void @llvm.dbg.value(metadata i8 %0, metadata !25, metadata !DIExpression(DW_OP_bit_piece, 4, 3)), !dbg !26
ret i8 %0, !dbg !27
}

declare void @llvm.dbg.declare(metadata, metadata, metadata)
declare void @llvm.dbg.value(metadata, metadata, metadata)

!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!11, !12}

!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !3, line: 6, type: !5, isLocal: false, isDefinition: true)
!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
!3 = !DIFile(filename: "DW_OP_bit_piece.cpp", directory: "./")
!4 = !{!0}
!5 = !DIDerivedType(tag: DW_TAG_typedef, name: "struct_t", file: !3, line: 4, baseType: !6)
!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, file: !3, line: 1, size: 8, flags: DIFlagTypePassByValue, elements: !7, identifier: "_ZTS8struct_t")
!7 = !{!8, !10}
!8 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !6, file: !3, line: 2, baseType: !9, size: 3, flags: DIFlagBitField, extraData: i64 0)
!9 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
!10 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !6, file: !3, line: 3, baseType: !9, size: 4, offset: 3, flags: DIFlagBitField, extraData: i64 0)
!11 = !{i32 7, !"Dwarf Version", i32 5}
!12 = !{i32 2, !"Debug Info Version", i32 3}
!13 = distinct !DISubprogram(name: "test1", linkageName: "test1", scope: !3, file: !3, line: 8, type: !14, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !16)
!14 = !DISubroutineType(types: !15)
!15 = !{!9}
!16 = !{}
!17 = !DILocalVariable(name: "x", scope: !13, file: !3, line: 9, type: !9)
!18 = !DILocation(line: 9, column: 9, scope: !13)
!19 = !DILocalVariable(name: "y", scope: !13, file: !3, line: 9, type: !9)
!20 = !DILocation(line: 9, column: 12, scope: !13)
!21 = !DILocation(line: 10, column: 3, scope: !13)
!22 = distinct !DISubprogram(name: "test2", linkageName: "test2", scope: !3, file: !3, line: 8, type: !14, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !16)
!23 = !DILocalVariable(name: "x", scope: !22, file: !3, line: 9, type: !9)
!24 = !DILocation(line: 9, column: 9, scope: !22)
!25 = !DILocalVariable(name: "y", scope: !22, file: !3, line: 9, type: !9)
!26 = !DILocation(line: 9, column: 12, scope: !22)
!27 = !DILocation(line: 10, column: 3, scope: !22)
23 changes: 23 additions & 0 deletions llvm/test/Verifier/diexpression-last.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
; RUN: not llvm-as -disable-output < %s 2>&1 | FileCheck %s

; Check that we give an error for expressions that are required to be the last
; in the list.

!named = !{!0, !1, !2, !3, !4, !5}
; CHECK: invalid expression
; CHECK-NEXT: !DIExpression({{.+}}, 0, 10, {{.+}})
!0 = !DIExpression(DW_OP_LLVM_fragment, 0, 10, DW_OP_deref)
; CHECK: invalid expression
; CHECK-NEXT: !DIExpression({{.+}}, 0, 20, {{.+}})
!1 = !DIExpression(DW_OP_bit_piece, 0, 20, DW_OP_deref)
; CHECK: invalid expression
; CHECK-NEXT: !DIExpression({{.+}}, 0, 30, {{.+}}, 3, 0)
!2 = !DIExpression(DW_OP_LLVM_fragment, 0, 30, DW_OP_bit_piece, 3, 0)
; CHECK: invalid expression
; CHECK-NEXT: !DIExpression({{.+}}, 40, 0, {{.+}}, 0, 4)
!3 = !DIExpression(DW_OP_bit_piece, 40, 0, DW_OP_LLVM_fragment, 0, 4)

; We shouldn't give an error if other expressions come earlier
; CHECK-NOT: invalid expression
!4 = !DIExpression(DW_OP_deref, DW_OP_LLVM_fragment, 0, 50)
!5 = !DIExpression(DW_OP_deref, DW_OP_bit_piece, 0, 60)
Loading