@@ -636,12 +636,14 @@ class YkIRWriter {
636
636
// We yet don't support:
637
637
// - the `inrange` keyword.
638
638
// - the vector variant.
639
+ // - exotic (non-zero) address spaces.
639
640
//
640
641
// It appears that `inrange` can't appear in a GEP *instruction* (only a
641
642
// GEP expression, inline in another instruction), but we check for it
642
643
// anyway.
643
644
if ((cast<GEPOperator>(I)->getInRangeIndex () != nullopt) ||
644
- (I->getPointerOperand ()->getType ()->isVectorTy ())) {
645
+ (I->getPointerOperand ()->getType ()->isVectorTy ()) ||
646
+ (I->getPointerAddressSpace () != 0 )) {
645
647
serialiseUnimplementedInstruction (I, FLCtxt, BBIdx, InstIdx);
646
648
return ;
647
649
}
@@ -654,32 +656,82 @@ class YkIRWriter {
654
656
serialiseOperand (I, FLCtxt, I->getPointerOperand ());
655
657
656
658
// GetElementPtrInst::collectOffset() reduces the GEP to:
657
- // - a static offset and
658
- // - zero or more dynamic offsets of the form `elem_count * elem_size`,
659
- // where `elem_count` is not known statically.
659
+ // - a static byte offset and
660
+ // - zero or more dynamic byte offsets of the form `elem_count *
661
+ // elem_size`, where `elem_count` is not known statically.
660
662
//
661
- // We lower this information directly into a Yk PtrAdd instruction.
663
+ // We encode this information into our Yk AOT `PtrAdd` instruction, but
664
+ // there are some semantics of LLVM's GEP that we have to be very careful to
665
+ // preserve. At the time of writing, these are the things that it's
666
+ // important to know:
662
667
//
668
+ // 1. LLVM integer types are neither signed nor unsigned. They are a bit
669
+ // pattern that can be interpreted (by LLVM instructions) as a signed
670
+ // or unsigned integer.
671
+ //
672
+ // 2. a dynamic index cannot be applied to a struct, because struct fields
673
+ // can have different types and you wouldn't be able to statically
674
+ // determine the type of the field being selected.
675
+ //
676
+ // 3. When indexing a struct, the index is interpreted as unsigned
677
+ // (because a negative field index makes no sense).
678
+ //
679
+ // 4. When indexing anything but a struct, the index is interpreted as
680
+ // signed, to allow (e.g.) negative array indices, or negative
681
+ // offsetting pointers.
682
+ //
683
+ // 5. Index operands to `getelementptr` can have arbitrary bit-width
684
+ // (although struct indexing must use i32). Index types with a different
685
+ // bit-width than the "pointer indexing type" for the address space in
686
+ // question must be extended or truncated (and if it's a signed index,
687
+ // then that's a sign extend!). To get the indexing type, you use
688
+ // `DataLayout:getIndexSizeInBits()`.
689
+ //
690
+ // 6. We can ignore the `inbounds` keyword on GEPs. When an `inbounds` GEP
691
+ // is out of bounds, a poison value is generated. Since a poison value
692
+ // represents (deferred) undefined behaviour (UB), we are free to
693
+ // compute any value we want, including the out of bounds offset.
694
+ //
695
+ // To simplify things a bit, we assume (as is that case for "regular"
696
+ // hardware/software platforms) that the LLVM pointer indexing type is the
697
+ // same size as a pointer. Just in case, let's assert it though:
698
+ unsigned IdxBitWidth = DL.getIndexSizeInBits (I->getPointerAddressSpace ());
699
+ assert (sizeof (void *) * 8 == IdxBitWidth);
700
+ // // And since we are going to use `get{S,Z}ExtValue()`, which return
701
+ // // `uint64_t` and `int64_t`, we should also check:
702
+ static_assert (sizeof (size_t ) <= sizeof (uint64_t ));
703
+
704
+ APInt ConstOff (IdxBitWidth, 0 );
705
+ MapVector<Value *, APInt> DynOffs;
663
706
// Note: the width of the collected constant offset must be the same as the
664
707
// index type bit-width.
665
- unsigned BitWidth = DL.getIndexSizeInBits (I->getPointerAddressSpace ());
666
- APInt ConstOff (BitWidth, 0 );
667
- MapVector<Value *, APInt> DynOffs;
668
- bool CollectRes = I->collectOffset (DL, BitWidth, DynOffs, ConstOff);
708
+ bool CollectRes = I->collectOffset (DL, IdxBitWidth, DynOffs, ConstOff);
669
709
assert (CollectRes);
670
710
671
711
// const_off:
672
- OutStreamer.emitSizeT (ConstOff.getZExtValue ());
712
+ //
713
+ // This is always signed and we can statically sign-extend now.
714
+ //
715
+ // FIXME: We can't deal with static offsets that don't fit in a ssize_t.
716
+ assert (ConstOff.sle (APInt (sizeof (size_t ) * 8 , SSIZE_MAX)));
717
+ OutStreamer.emitSizeT (ConstOff.getSExtValue ());
673
718
// num_dyn_offs:
674
719
size_t NumDyn = DynOffs.size ();
675
720
OutStreamer.emitSizeT (NumDyn);
676
721
// dyn_elem_counts:
722
+ //
723
+ // These are interpreted as signed.
677
724
for (auto &KV : DynOffs) {
678
725
serialiseOperand (I, FLCtxt, std::get<0 >(KV));
679
726
}
680
727
// dyn_elem_sizes:
728
+ //
729
+ // These are unsigned and we can statically zero-extend now.
681
730
for (auto &KV : DynOffs) {
682
- OutStreamer.emitSizeT (std::get<1 >(KV).getZExtValue ());
731
+ APInt DS = std::get<1 >(KV);
732
+ // FIXME: We can't deal with element sizes that don't fit in a size_t.
733
+ assert (DS.ule (APInt (sizeof (size_t ) * 8 , SIZE_MAX)));
734
+ OutStreamer.emitSizeT (DS.getZExtValue ());
683
735
}
684
736
685
737
FLCtxt.updateVLMap (I, InstIdx);
@@ -1063,6 +1115,11 @@ class YkIRWriter {
1063
1115
OutStreamer.emitInt32 (Magic);
1064
1116
OutStreamer.emitInt32 (Version);
1065
1117
1118
+ // ptr_off_bitsize:
1119
+ unsigned IdxBitWidth = DL.getIndexSizeInBits (0 );
1120
+ assert (IdxBitWidth <= 0xff );
1121
+ OutStreamer.emitInt8 (IdxBitWidth);
1122
+
1066
1123
// num_funcs:
1067
1124
OutStreamer.emitSizeT (M.size ());
1068
1125
// funcs:
0 commit comments