@@ -53,6 +53,7 @@ enum Op {
53
53
ADDI_W = 0x02800000 ,
54
54
ADDI_D = 0x02c00000 ,
55
55
ANDI = 0x03400000 ,
56
+ PCADDI = 0x18000000 ,
56
57
PCADDU12I = 0x1c000000 ,
57
58
LD_W = 0x28800000 ,
58
59
LD_D = 0x28c00000 ,
@@ -131,6 +132,10 @@ static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
131
132
return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1 )) - 1 )) >> end;
132
133
}
133
134
135
+ static uint32_t getD5 (uint64_t v) { return extractBits (v, 4 , 0 ); }
136
+
137
+ static uint32_t getJ5 (uint64_t v) { return extractBits (v, 9 , 5 ); }
138
+
134
139
static uint32_t setD5k16 (uint32_t insn, uint32_t imm) {
135
140
uint32_t immLo = extractBits (imm, 15 , 0 );
136
141
uint32_t immHi = extractBits (imm, 20 , 16 );
@@ -743,6 +748,88 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
743
748
}
744
749
}
745
750
751
+ static bool relaxable (ArrayRef<Relocation> relocs, size_t i) {
752
+ return i + 1 < relocs.size () && relocs[i + 1 ].type == R_LARCH_RELAX;
753
+ }
754
+
755
+ static bool isPairRelaxable (ArrayRef<Relocation> relocs, size_t i) {
756
+ return relaxable (relocs, i) && relaxable (relocs, i + 2 ) &&
757
+ relocs[i].offset + 4 == relocs[i + 2 ].offset ;
758
+ }
759
+
760
+ // Relax code sequence.
761
+ // From:
762
+ // pcalau12i $a0, %pc_hi20(sym)
763
+ // addi.w/d $a0, $a0, %pc_lo12(sym)
764
+ // To:
765
+ // pcaddi $a0, %pc_lo12(sym)
766
+ //
767
+ // From:
768
+ // pcalau12i $a0, %got_pc_hi20(sym_got)
769
+ // ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
770
+ // To:
771
+ // pcaddi $a0, %got_pc_hi20(sym_got)
772
+ static void relaxPCHi20Lo12 (Ctx &ctx, const InputSection &sec, size_t i,
773
+ uint64_t loc, Relocation &rHi20, Relocation &rLo12,
774
+ uint32_t &remove) {
775
+ // check if the relocations are relaxable sequences.
776
+ if (!((rHi20.type == R_LARCH_PCALA_HI20 &&
777
+ rLo12.type == R_LARCH_PCALA_LO12) ||
778
+ (rHi20.type == R_LARCH_GOT_PC_HI20 &&
779
+ rLo12.type == R_LARCH_GOT_PC_LO12)))
780
+ return ;
781
+
782
+ // GOT references to absolute symbols can't be relaxed to use pcaddi in
783
+ // position-independent code, because these instructions produce a relative
784
+ // address.
785
+ // Meanwhile skip undefined, preemptible and STT_GNU_IFUNC symbols, because
786
+ // these symbols may be resolve in runtime.
787
+ if (rHi20.type == R_LARCH_GOT_PC_HI20 &&
788
+ (!rHi20.sym ->isDefined () || rHi20.sym ->isPreemptible ||
789
+ rHi20.sym ->isGnuIFunc () ||
790
+ (ctx.arg .isPic && !cast<Defined>(*rHi20.sym ).section )))
791
+ return ;
792
+
793
+ uint64_t dest = 0 ;
794
+ if (rHi20.expr == RE_LOONGARCH_PLT_PAGE_PC)
795
+ dest = rHi20.sym ->getPltVA (ctx);
796
+ else if (rHi20.expr == RE_LOONGARCH_PAGE_PC ||
797
+ rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC)
798
+ dest = rHi20.sym ->getVA (ctx);
799
+ else {
800
+ Err (ctx) << getErrorLoc (ctx, (const uint8_t *)loc) << " unknown expr ("
801
+ << rHi20.expr << " ) against symbol " << rHi20.sym
802
+ << " in relaxPCHi20Lo12" ;
803
+ return ;
804
+ }
805
+ dest += rHi20.addend ;
806
+
807
+ const int64_t displace = dest - loc;
808
+ // Check if the displace aligns 4 bytes or exceeds the range of pcaddi.
809
+ if ((displace & 0x3 ) != 0 || !isInt<22 >(displace))
810
+ return ;
811
+
812
+ // Note: If we can ensure that the .o files generated by LLVM only contain
813
+ // relaxable instruction sequences with R_LARCH_RELAX, then we do not need to
814
+ // decode instructions. The relaxable instruction sequences imply the
815
+ // following constraints:
816
+ // * For relocation pairs related to got_pc, the opcodes of instructions
817
+ // must be pcalau12i + ld.w/d. In other cases, the opcodes must be pcalau12i +
818
+ // addi.w/d.
819
+ // * The destination register of pcalau12i is guaranteed to be used only by
820
+ // the immediately following instruction.
821
+ const uint32_t currInsn = read32le (sec.content ().data () + rHi20.offset );
822
+ const uint32_t nextInsn = read32le (sec.content ().data () + rLo12.offset );
823
+ // Check if use the same register.
824
+ if (getD5 (currInsn) != getJ5 (nextInsn) || getJ5 (nextInsn) != getD5 (nextInsn))
825
+ return ;
826
+
827
+ sec.relaxAux ->relocTypes [i] = R_LARCH_RELAX;
828
+ sec.relaxAux ->relocTypes [i + 2 ] = R_LARCH_PCREL20_S2;
829
+ sec.relaxAux ->writes .push_back (insn (PCADDI, getD5 (nextInsn), 0 , 0 ));
830
+ remove = 4 ;
831
+ }
832
+
746
833
static bool relax (Ctx &ctx, InputSection &sec) {
747
834
const uint64_t secAddr = sec.getVA ();
748
835
const MutableArrayRef<Relocation> relocs = sec.relocs ();
@@ -781,6 +868,12 @@ static bool relax(Ctx &ctx, InputSection &sec) {
781
868
}
782
869
break ;
783
870
}
871
+ case R_LARCH_PCALA_HI20:
872
+ case R_LARCH_GOT_PC_HI20:
873
+ // The overflow check for i+2 will be carried out in isPairRelaxable.
874
+ if (isPairRelaxable (relocs, i))
875
+ relaxPCHi20Lo12 (ctx, sec, i, loc, r, relocs[i + 2 ], remove );
876
+ break ;
784
877
}
785
878
786
879
// For all anchors whose offsets are <= r.offset, they are preceded by
@@ -851,6 +944,7 @@ void LoongArch::finalizeRelax(int passes) const {
851
944
MutableArrayRef<Relocation> rels = sec->relocs ();
852
945
ArrayRef<uint8_t > old = sec->content ();
853
946
size_t newSize = old.size () - aux.relocDeltas [rels.size () - 1 ];
947
+ size_t writesIdx = 0 ;
854
948
uint8_t *p = ctx.bAlloc .Allocate <uint8_t >(newSize);
855
949
uint64_t offset = 0 ;
856
950
int64_t delta = 0 ;
@@ -867,11 +961,29 @@ void LoongArch::finalizeRelax(int passes) const {
867
961
continue ;
868
962
869
963
// Copy from last location to the current relocated location.
870
- const Relocation &r = rels[i];
964
+ Relocation &r = rels[i];
871
965
uint64_t size = r.offset - offset;
872
966
memcpy (p, old.data () + offset, size);
873
967
p += size;
874
- offset = r.offset + remove ;
968
+
969
+ int64_t skip = 0 ;
970
+ if (RelType newType = aux.relocTypes [i]) {
971
+ switch (newType) {
972
+ case R_LARCH_RELAX:
973
+ break ;
974
+ case R_LARCH_PCREL20_S2:
975
+ skip = 4 ;
976
+ write32le (p, aux.writes [writesIdx++]);
977
+ // RelExpr is needed for relocating.
978
+ r.expr = r.sym ->hasFlag (NEEDS_PLT) ? R_PLT_PC : R_PC;
979
+ break ;
980
+ default :
981
+ llvm_unreachable (" unsupported type" );
982
+ }
983
+ }
984
+
985
+ p += skip;
986
+ offset = r.offset + skip + remove ;
875
987
}
876
988
memcpy (p, old.data () + offset, old.size () - offset);
877
989
0 commit comments