[lld][AArch64] Fix getImplicitAddend in big-endian mode. (#107845)

statham-arm · web-flow · commit daf208598b06 · 2024-09-10T12:38:32.000+01:00
In AArch64, the endianness of instruction encodings is always little,
whereas the endianness of data swaps between LE and BE modes. So
getImplicitAddend must use the right one of read32() and read32le(), for
data and code respectively. It was using read32() throughout, causing
instructions to be read as big-endian in BE mode, getting the wrong
addend.

Fixed, and updated the existing test to check both endiannesses. The
expected results for data must be byte-swapped, but the ones for code
need no adjustment.
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
@@ -268,13 +268,13 @@ int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const {
   case R_AARCH64_MOVW_UABS_G2:
   case R_AARCH64_MOVW_UABS_G2_NC:
   case R_AARCH64_MOVW_UABS_G3:
-    return SignExtend64<16>(getBits(read32(buf), 5, 20));
+    return SignExtend64<16>(getBits(read32le(buf), 5, 20));
 
     // R_AARCH64_TSTBR14 points at a TBZ or TBNZ instruction, which
     // has a 14-bit offset measured in instructions, i.e. shifted left
     // by 2.
   case R_AARCH64_TSTBR14:
-    return SignExtend64<16>(getBits(read32(buf), 5, 18) << 2);
+    return SignExtend64<16>(getBits(read32le(buf), 5, 18) << 2);
 
     // R_AARCH64_CONDBR19 operates on the ordinary B.cond instruction,
     // which has a 19-bit offset measured in instructions.
@@ -285,13 +285,13 @@ int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const {
     // R_AARCH64_CONDBR19.
   case R_AARCH64_CONDBR19:
   case R_AARCH64_LD_PREL_LO19:
-    return SignExtend64<21>(getBits(read32(buf), 5, 23) << 2);
+    return SignExtend64<21>(getBits(read32le(buf), 5, 23) << 2);
 
     // R_AARCH64_ADD_ABS_LO12_NC operates on ADD (immediate). The
     // immediate can optionally be shifted left by 12 bits, but this
     // relocation is intended for the case where it is not.
   case R_AARCH64_ADD_ABS_LO12_NC:
-    return SignExtend64<12>(getBits(read32(buf), 10, 21));
+    return SignExtend64<12>(getBits(read32le(buf), 10, 21));
 
     // R_AARCH64_ADR_PREL_LO21 operates on an ADR instruction, whose
     // 21-bit immediate is split between two bits high up in the word
@@ -305,14 +305,14 @@ int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const {
   case R_AARCH64_ADR_PREL_LO21:
   case R_AARCH64_ADR_PREL_PG_HI21:
   case R_AARCH64_ADR_PREL_PG_HI21_NC:
-    return SignExtend64<21>((getBits(read32(buf), 5, 23) << 2) |
-                            getBits(read32(buf), 29, 30));
+    return SignExtend64<21>((getBits(read32le(buf), 5, 23) << 2) |
+                            getBits(read32le(buf), 29, 30));
 
     // R_AARCH64_{JUMP,CALL}26 operate on B and BL, which have a
     // 26-bit offset measured in instructions.
   case R_AARCH64_JUMP26:
   case R_AARCH64_CALL26:
-    return SignExtend64<28>(getBits(read32(buf), 0, 25) << 2);
+    return SignExtend64<28>(getBits(read32le(buf), 0, 25) << 2);
 
   default:
     internalLinkerError(getErrorLocation(buf),
diff --git a/lld/test/ELF/aarch64-reloc-implicit-addend.test b/lld/test/ELF/aarch64-reloc-implicit-addend.test
@@ -11,15 +11,25 @@ REQUIRES: aarch64
 ## .reloc directives containing no addend, this succeeds.
 
 # RUN: rm -rf %t && split-file %s %t && cd %t
+
 # RUN: llvm-mc -filetype=obj -triple=aarch64 relocs.s -o rela.o
 # RUN: obj2yaml rela.o -o rela.yaml
 # RUN: sed "s/\.rela/\.rel/;s/SHT_RELA/SHT_REL/" rela.yaml > rel.yaml
 # RUN: yaml2obj rel.yaml -o rel.o
 # RUN: llvm-mc -filetype=obj -triple=aarch64 symbols.s -o symbols.o
 # RUN: ld.lld rel.o symbols.o -o a.out --section-start=.data=0x100000 --section-start=.text=0x200000
-# RUN: llvm-objdump -s a.out | FileCheck %s --check-prefix=DATA
+# RUN: llvm-objdump -s a.out | FileCheck %s --check-prefix=DATALE
 # RUN: llvm-objdump -d a.out | FileCheck %s --check-prefix=CODE
 
+# RUN: llvm-mc -filetype=obj -triple=aarch64_be relocs.s -o rela_be.o
+# RUN: obj2yaml rela_be.o -o rela_be.yaml
+# RUN: sed "s/\.rela/\.rel/;s/SHT_RELA/SHT_REL/" rela_be.yaml > rel_be.yaml
+# RUN: yaml2obj rel_be.yaml -o rel_be.o
+# RUN: llvm-mc -filetype=obj -triple=aarch64_be symbols.s -o symbols_be.o
+# RUN: ld.lld -EB rel_be.o symbols_be.o -o be.out --section-start=.data=0x100000 --section-start=.text=0x200000
+# RUN: llvm-objdump -s be.out | FileCheck %s --check-prefix=DATABE
+# RUN: llvm-objdump -d be.out | FileCheck %s --check-prefix=CODE
+
 #--- symbols.s
 
 // Source file containing the values of target symbols for the relocations. If
@@ -53,7 +63,8 @@ REQUIRES: aarch64
 // Source file containing the test instructions and their relocations, with the
 // FileCheck comments interleaved.
 
-// DATA: Contents of section .data:
+// DATALE: Contents of section .data:
+// DATABE: Contents of section .data:
 .data
 
 // First test absolute data relocations. For each one I show the expected
@@ -72,7 +83,8 @@ REQUIRES: aarch64
         .reloc ., R_AARCH64_ABS16, abs16
         .hword 0x1234
 
-        // DATA-NEXT:  100000 98badcfe efcdab89 a9cbbc9a cdab
+        // DATALE-NEXT:  100000 98badcfe efcdab89 a9cbbc9a cdab
+        // DATABE-NEXT:  100000 89abcdef fedcba98 9abccba9 abcd
 
         .balign 16
 
@@ -91,7 +103,8 @@ REQUIRES: aarch64
         .reloc ., R_AARCH64_PREL16, data
         .hword 0x1234
 
-        // DATA-NEXT:  100010 11436587 78563412 09433412 1812
+        // DATALE-NEXT:  100010 11436587 78563412 09433412 1812
+        // DATABE-NEXT:  100010 12345678 87654311 12344309 1218
 
 // CODE: 0000000000200000 <_start>:
 .text
@@ -123,7 +136,7 @@ _start:
 
         .reloc ., R_AARCH64_MOVW_UABS_G0_NC, big64
         movz x0, #0x1234
-        // CODE-NEXT:  200010: d2823560      mov     x0, #0x11ab            
+        // CODE-NEXT:  200010: d2823560      mov     x0, #0x11ab
         .reloc ., R_AARCH64_MOVW_UABS_G1_NC, big64
         movk x0, #0x1234, lsl #16
         // CODE-NEXT:  200014: f2a00000      movk    x0, #0x0, lsl #16