Skip to content

Commit 0c1f0d1

Browse files
authored
Merge pull request rust-lang#154 from vext01/gep-refinements
Be more systematic in serialising GEP instructions.
2 parents 820c5d1 + 84faa48 commit 0c1f0d1

File tree

1 file changed

+68
-11
lines changed

1 file changed

+68
-11
lines changed

llvm/lib/YkIR/YkIRWriter.cpp

Lines changed: 68 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -636,12 +636,14 @@ class YkIRWriter {
636636
// We yet don't support:
637637
// - the `inrange` keyword.
638638
// - the vector variant.
639+
// - exotic (non-zero) address spaces.
639640
//
640641
// It appears that `inrange` can't appear in a GEP *instruction* (only a
641642
// GEP expression, inline in another instruction), but we check for it
642643
// anyway.
643644
if ((cast<GEPOperator>(I)->getInRangeIndex() != nullopt) ||
644-
(I->getPointerOperand()->getType()->isVectorTy())) {
645+
(I->getPointerOperand()->getType()->isVectorTy()) ||
646+
(I->getPointerAddressSpace() != 0)) {
645647
serialiseUnimplementedInstruction(I, FLCtxt, BBIdx, InstIdx);
646648
return;
647649
}
@@ -654,32 +656,82 @@ class YkIRWriter {
654656
serialiseOperand(I, FLCtxt, I->getPointerOperand());
655657

656658
// GetElementPtrInst::collectOffset() reduces the GEP to:
657-
// - a static offset and
658-
// - zero or more dynamic offsets of the form `elem_count * elem_size`,
659-
// where `elem_count` is not known statically.
659+
// - a static byte offset and
660+
// - zero or more dynamic byte offsets of the form `elem_count *
661+
// elem_size`, where `elem_count` is not known statically.
660662
//
661-
// We lower this information directly into a Yk PtrAdd instruction.
663+
// We encode this information into our Yk AOT `PtrAdd` instruction, but
664+
// there are some semantics of LLVM's GEP that we have to be very careful to
665+
// preserve. At the time of writing, these are the things that it's
666+
// important to know:
662667
//
668+
// 1. LLVM integer types are neither signed nor unsigned. They are a bit
669+
// pattern that can be interpreted (by LLVM instructions) as a signed
670+
// or unsigned integer.
671+
//
672+
// 2. a dynamic index cannot be applied to a struct, because struct fields
673+
// can have different types and you wouldn't be able to statically
674+
// determine the type of the field being selected.
675+
//
676+
// 3. When indexing a struct, the index is interpreted as unsigned
677+
// (because a negative field index makes no sense).
678+
//
679+
// 4. When indexing anything but a struct, the index is interpreted as
680+
// signed, to allow (e.g.) negative array indices, or negative
681+
// offsetting pointers.
682+
//
683+
// 5. Index operands to `getelementptr` can have arbitrary bit-width
684+
// (although struct indexing must use i32). Index types with a different
685+
// bit-width than the "pointer indexing type" for the address space in
686+
// question must be extended or truncated (and if it's a signed index,
687+
// then that's a sign extend!). To get the indexing type, you use
688+
// `DataLayout:getIndexSizeInBits()`.
689+
//
690+
// 6. We can ignore the `inbounds` keyword on GEPs. When an `inbounds` GEP
691+
// is out of bounds, a poison value is generated. Since a poison value
692+
// represents (deferred) undefined behaviour (UB), we are free to
693+
// compute any value we want, including the out of bounds offset.
694+
//
695+
// To simplify things a bit, we assume (as is that case for "regular"
696+
// hardware/software platforms) that the LLVM pointer indexing type is the
697+
// same size as a pointer. Just in case, let's assert it though:
698+
unsigned IdxBitWidth = DL.getIndexSizeInBits(I->getPointerAddressSpace());
699+
assert(sizeof(void *) * 8 == IdxBitWidth);
700+
//// And since we are going to use `get{S,Z}ExtValue()`, which return
701+
//// `uint64_t` and `int64_t`, we should also check:
702+
static_assert(sizeof(size_t) <= sizeof(uint64_t));
703+
704+
APInt ConstOff(IdxBitWidth, 0);
705+
MapVector<Value *, APInt> DynOffs;
663706
// Note: the width of the collected constant offset must be the same as the
664707
// index type bit-width.
665-
unsigned BitWidth = DL.getIndexSizeInBits(I->getPointerAddressSpace());
666-
APInt ConstOff(BitWidth, 0);
667-
MapVector<Value *, APInt> DynOffs;
668-
bool CollectRes = I->collectOffset(DL, BitWidth, DynOffs, ConstOff);
708+
bool CollectRes = I->collectOffset(DL, IdxBitWidth, DynOffs, ConstOff);
669709
assert(CollectRes);
670710

671711
// const_off:
672-
OutStreamer.emitSizeT(ConstOff.getZExtValue());
712+
//
713+
// This is always signed and we can statically sign-extend now.
714+
//
715+
// FIXME: We can't deal with static offsets that don't fit in a ssize_t.
716+
assert(ConstOff.sle(APInt(sizeof(size_t) * 8, SSIZE_MAX)));
717+
OutStreamer.emitSizeT(ConstOff.getSExtValue());
673718
// num_dyn_offs:
674719
size_t NumDyn = DynOffs.size();
675720
OutStreamer.emitSizeT(NumDyn);
676721
// dyn_elem_counts:
722+
//
723+
// These are interpreted as signed.
677724
for (auto &KV : DynOffs) {
678725
serialiseOperand(I, FLCtxt, std::get<0>(KV));
679726
}
680727
// dyn_elem_sizes:
728+
//
729+
// These are unsigned and we can statically zero-extend now.
681730
for (auto &KV : DynOffs) {
682-
OutStreamer.emitSizeT(std::get<1>(KV).getZExtValue());
731+
APInt DS = std::get<1>(KV);
732+
// FIXME: We can't deal with element sizes that don't fit in a size_t.
733+
assert(DS.ule(APInt(sizeof(size_t) * 8, SIZE_MAX)));
734+
OutStreamer.emitSizeT(DS.getZExtValue());
683735
}
684736

685737
FLCtxt.updateVLMap(I, InstIdx);
@@ -1063,6 +1115,11 @@ class YkIRWriter {
10631115
OutStreamer.emitInt32(Magic);
10641116
OutStreamer.emitInt32(Version);
10651117

1118+
// ptr_off_bitsize:
1119+
unsigned IdxBitWidth = DL.getIndexSizeInBits(0);
1120+
assert(IdxBitWidth <= 0xff);
1121+
OutStreamer.emitInt8(IdxBitWidth);
1122+
10661123
// num_funcs:
10671124
OutStreamer.emitSizeT(M.size());
10681125
// funcs:

0 commit comments

Comments
 (0)