Skip to content

Commit 84faa48

Browse files
committed
Be more systematic in serialising GEP instructions.
getelementptr is full of semantic details. This change makes our lowering much more defensive in an attempt to get things right (tm). This was the result of a full day of reading/musing, starting with: https://llvm.org/docs/LangRef.html#getelementptr-instruction and: https://llvm.org/docs/GetElementPtr.html and: https://llvm.org/docs/LangRef.html#poisonvalues We still can't handle everything that could theoretically be possible, but a) for now those cases are unlikely to arise for us, and b) I've asserted them, so we can catch them early if they do arise. I hope I've got it right!
1 parent 820c5d1 commit 84faa48

File tree

1 file changed

+68
-11
lines changed

1 file changed

+68
-11
lines changed

llvm/lib/YkIR/YkIRWriter.cpp

Lines changed: 68 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -636,12 +636,14 @@ class YkIRWriter {
636636
// We yet don't support:
637637
// - the `inrange` keyword.
638638
// - the vector variant.
639+
// - exotic (non-zero) address spaces.
639640
//
640641
// It appears that `inrange` can't appear in a GEP *instruction* (only a
641642
// GEP expression, inline in another instruction), but we check for it
642643
// anyway.
643644
if ((cast<GEPOperator>(I)->getInRangeIndex() != nullopt) ||
644-
(I->getPointerOperand()->getType()->isVectorTy())) {
645+
(I->getPointerOperand()->getType()->isVectorTy()) ||
646+
(I->getPointerAddressSpace() != 0)) {
645647
serialiseUnimplementedInstruction(I, FLCtxt, BBIdx, InstIdx);
646648
return;
647649
}
@@ -654,32 +656,82 @@ class YkIRWriter {
654656
serialiseOperand(I, FLCtxt, I->getPointerOperand());
655657

656658
// GetElementPtrInst::collectOffset() reduces the GEP to:
657-
// - a static offset and
658-
// - zero or more dynamic offsets of the form `elem_count * elem_size`,
659-
// where `elem_count` is not known statically.
659+
// - a static byte offset and
660+
// - zero or more dynamic byte offsets of the form `elem_count *
661+
// elem_size`, where `elem_count` is not known statically.
660662
//
661-
// We lower this information directly into a Yk PtrAdd instruction.
663+
// We encode this information into our Yk AOT `PtrAdd` instruction, but
664+
// there are some semantics of LLVM's GEP that we have to be very careful to
665+
// preserve. At the time of writing, these are the things that it's
666+
// important to know:
662667
//
668+
// 1. LLVM integer types are neither signed nor unsigned. They are a bit
669+
// pattern that can be interpreted (by LLVM instructions) as a signed
670+
// or unsigned integer.
671+
//
672+
// 2. a dynamic index cannot be applied to a struct, because struct fields
673+
// can have different types and you wouldn't be able to statically
674+
// determine the type of the field being selected.
675+
//
676+
// 3. When indexing a struct, the index is interpreted as unsigned
677+
// (because a negative field index makes no sense).
678+
//
679+
// 4. When indexing anything but a struct, the index is interpreted as
680+
// signed, to allow (e.g.) negative array indices, or negative
681+
// offsetting pointers.
682+
//
683+
// 5. Index operands to `getelementptr` can have arbitrary bit-width
684+
// (although struct indexing must use i32). Index types with a different
685+
// bit-width than the "pointer indexing type" for the address space in
686+
// question must be extended or truncated (and if it's a signed index,
687+
// then that's a sign extend!). To get the indexing type, you use
688+
// `DataLayout:getIndexSizeInBits()`.
689+
//
690+
// 6. We can ignore the `inbounds` keyword on GEPs. When an `inbounds` GEP
691+
// is out of bounds, a poison value is generated. Since a poison value
692+
// represents (deferred) undefined behaviour (UB), we are free to
693+
// compute any value we want, including the out of bounds offset.
694+
//
695+
// To simplify things a bit, we assume (as is that case for "regular"
696+
// hardware/software platforms) that the LLVM pointer indexing type is the
697+
// same size as a pointer. Just in case, let's assert it though:
698+
unsigned IdxBitWidth = DL.getIndexSizeInBits(I->getPointerAddressSpace());
699+
assert(sizeof(void *) * 8 == IdxBitWidth);
700+
//// And since we are going to use `get{S,Z}ExtValue()`, which return
701+
//// `uint64_t` and `int64_t`, we should also check:
702+
static_assert(sizeof(size_t) <= sizeof(uint64_t));
703+
704+
APInt ConstOff(IdxBitWidth, 0);
705+
MapVector<Value *, APInt> DynOffs;
663706
// Note: the width of the collected constant offset must be the same as the
664707
// index type bit-width.
665-
unsigned BitWidth = DL.getIndexSizeInBits(I->getPointerAddressSpace());
666-
APInt ConstOff(BitWidth, 0);
667-
MapVector<Value *, APInt> DynOffs;
668-
bool CollectRes = I->collectOffset(DL, BitWidth, DynOffs, ConstOff);
708+
bool CollectRes = I->collectOffset(DL, IdxBitWidth, DynOffs, ConstOff);
669709
assert(CollectRes);
670710

671711
// const_off:
672-
OutStreamer.emitSizeT(ConstOff.getZExtValue());
712+
//
713+
// This is always signed and we can statically sign-extend now.
714+
//
715+
// FIXME: We can't deal with static offsets that don't fit in a ssize_t.
716+
assert(ConstOff.sle(APInt(sizeof(size_t) * 8, SSIZE_MAX)));
717+
OutStreamer.emitSizeT(ConstOff.getSExtValue());
673718
// num_dyn_offs:
674719
size_t NumDyn = DynOffs.size();
675720
OutStreamer.emitSizeT(NumDyn);
676721
// dyn_elem_counts:
722+
//
723+
// These are interpreted as signed.
677724
for (auto &KV : DynOffs) {
678725
serialiseOperand(I, FLCtxt, std::get<0>(KV));
679726
}
680727
// dyn_elem_sizes:
728+
//
729+
// These are unsigned and we can statically zero-extend now.
681730
for (auto &KV : DynOffs) {
682-
OutStreamer.emitSizeT(std::get<1>(KV).getZExtValue());
731+
APInt DS = std::get<1>(KV);
732+
// FIXME: We can't deal with element sizes that don't fit in a size_t.
733+
assert(DS.ule(APInt(sizeof(size_t) * 8, SIZE_MAX)));
734+
OutStreamer.emitSizeT(DS.getZExtValue());
683735
}
684736

685737
FLCtxt.updateVLMap(I, InstIdx);
@@ -1063,6 +1115,11 @@ class YkIRWriter {
10631115
OutStreamer.emitInt32(Magic);
10641116
OutStreamer.emitInt32(Version);
10651117

1118+
// ptr_off_bitsize:
1119+
unsigned IdxBitWidth = DL.getIndexSizeInBits(0);
1120+
assert(IdxBitWidth <= 0xff);
1121+
OutStreamer.emitInt8(IdxBitWidth);
1122+
10661123
// num_funcs:
10671124
OutStreamer.emitSizeT(M.size());
10681125
// funcs:

0 commit comments

Comments
 (0)