[ubsan] Display correct runtime messages for negative _BitInt (llvm#93612)

earnol · Vladislav Aranov · AaronBallman · AlexisPerry · commit abf62ab882d7 · 2024-06-27T14:46:37.000-06:00
Without this patch compiler-rt ubsan library has a bug displaying incorrect values for variables of the _BitInt (previously called _ExtInt) type. This patch affects affects both: generation of metadata inside code generator and runtime part. The runtime part provided only for i386 and x86_64 runtimes. Other runtimes should be updated to take full benefit of this patch. The patch is constructed the way to be backward compatible and int and float type runtime diagnostics should be unaffected for not yet updated runtimes. This patch fixes issue: llvm#64100. Co-authored-by: Vladislav Aranov <vladislav.aranov@ericsson.com> Co-authored-by: Aaron Ballman <aaron@aaronballman.com>
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
@@ -41,6 +41,7 @@
 #include "llvm/IR/MatrixBuilder.h"
 #include "llvm/Passes/OptimizationLevel.h"
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SaveAndRestore.h"
@@ -64,6 +65,22 @@ static llvm::cl::opt<bool> ClSanitizeGuardChecks(
     "ubsan-guard-checks", llvm::cl::Optional,
     llvm::cl::desc("Guard UBSAN checks with `llvm.allow.ubsan.check()`."));
 
+//===--------------------------------------------------------------------===//
+//                        Defines for metadata
+//===--------------------------------------------------------------------===//
+
+// Those values are crucial to be the SAME as in ubsan runtime library.
+enum VariableTypeDescriptorKind : uint16_t {
+  /// An integer type.
+  TK_Integer = 0x0000,
+  /// A floating-point type.
+  TK_Float = 0x0001,
+  /// An _BitInt(N) type.
+  TK_BitInt = 0x0002,
+  /// Any other type. The value representation is unspecified.
+  TK_Unknown = 0xffff
+};
+
 //===--------------------------------------------------------------------===//
 //                        Miscellaneous Helper Methods
 //===--------------------------------------------------------------------===//
@@ -3298,22 +3315,40 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
 ///   { i16 TypeKind, i16 TypeInfo }
 /// \endcode
 ///
-/// followed by an array of i8 containing the type name. TypeKind is 0 for an
-/// integer, 1 for a floating point value, and -1 for anything else.
+/// followed by an array of i8 containing the type name with extra information
+/// for BitInt. TypeKind is TK_Integer(0) for an integer, TK_Float(1) for a
+/// floating point value, TK_BitInt(2) for BitInt and TK_Unknown(0xFFFF) for
+/// anything else.
 llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
   // Only emit each type's descriptor once.
   if (llvm::Constant *C = CGM.getTypeDescriptorFromMap(T))
     return C;
 
-  uint16_t TypeKind = -1;
+  uint16_t TypeKind = TK_Unknown;
   uint16_t TypeInfo = 0;
+  bool IsBitInt = false;
 
   if (T->isIntegerType()) {
-    TypeKind = 0;
+    TypeKind = TK_Integer;
     TypeInfo = (llvm::Log2_32(getContext().getTypeSize(T)) << 1) |
                (T->isSignedIntegerType() ? 1 : 0);
+    // Follow suggestion from https://github.com/llvm/llvm-project/issues/64100
+    // So we can write the exact amount of bits in TypeName after '\0'
+    // making it <diagnostic-like type name>.'\0'.<32-bit width>.
+    if (T->isSignedIntegerType() && T->getAs<BitIntType>()) {
+      // Do a sanity checks as we are using 32-bit type to store bit length.
+      assert((getContext().getTypeSize(T) > 0) &&
+             " non positive amount of bits in __BitInt type");
+      assert((getContext().getTypeSize(T) <= 0xFFFFFFFF) &&
+             " too many bits in __BitInt type");
+
+      // Redefine TypeKind with the actual __BitInt type if we have signed
+      // BitInt.
+      TypeKind = TK_BitInt;
+      IsBitInt = true;
+    }
   } else if (T->isFloatingType()) {
-    TypeKind = 1;
+    TypeKind = TK_Float;
     TypeInfo = getContext().getTypeSize(T);
   }
 
@@ -3324,6 +3359,20 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
       DiagnosticsEngine::ak_qualtype, (intptr_t)T.getAsOpaquePtr(), StringRef(),
       StringRef(), std::nullopt, Buffer, std::nullopt);
 
+  if (IsBitInt) {
+    // The Structure is: 0 to end the string, 32 bit unsigned integer in target
+    // endianness, zero.
+    char S[6] = {'\0', '\0', '\0', '\0', '\0', '\0'};
+    const auto *EIT = T->castAs<BitIntType>();
+    uint32_t Bits = EIT->getNumBits();
+    llvm::support::endian::write32(S + 1, Bits,
+                                   getTarget().isBigEndian()
+                                       ? llvm::endianness::big
+                                       : llvm::endianness::little);
+    StringRef str = StringRef(S, sizeof(S) / sizeof(decltype(S[0])));
+    Buffer.append(str);
+  }
+
   llvm::Constant *Components[] = {
     Builder.getInt16(TypeKind), Builder.getInt16(TypeInfo),
     llvm::ConstantDataArray::getString(getLLVMContext(), Buffer)
diff --git a/compiler-rt/lib/ubsan/ubsan_value.cpp b/compiler-rt/lib/ubsan/ubsan_value.cpp
@@ -67,18 +67,21 @@ const char *__ubsan::getObjCClassName(ValueHandle Pointer) {
 
 SIntMax Value::getSIntValue() const {
   CHECK(getType().isSignedIntegerTy());
+  // Val was zero-extended to ValueHandle. Sign-extend from original width
+  // to SIntMax.
+  const unsigned ExtraBits =
+      sizeof(SIntMax) * 8 - getType().getIntegerBitCount();
   if (isInlineInt()) {
-    // Val was zero-extended to ValueHandle. Sign-extend from original width
-    // to SIntMax.
-    const unsigned ExtraBits =
-      sizeof(SIntMax) * 8 - getType().getIntegerBitWidth();
     return SIntMax(UIntMax(Val) << ExtraBits) >> ExtraBits;
   }
-  if (getType().getIntegerBitWidth() == 64)
-    return *reinterpret_cast<s64*>(Val);
+  if (getType().getIntegerBitWidth() == 64) {
+    return SIntMax(UIntMax(*reinterpret_cast<s64 *>(Val)) << ExtraBits) >>
+           ExtraBits;
+  }
 #if HAVE_INT128_T
   if (getType().getIntegerBitWidth() == 128)
-    return *reinterpret_cast<s128*>(Val);
+    return SIntMax(UIntMax(*reinterpret_cast<s128 *>(Val)) << ExtraBits) >>
+           ExtraBits;
 #else
   if (getType().getIntegerBitWidth() == 128)
     UNREACHABLE("libclang_rt.ubsan was built without __int128 support");
diff --git a/compiler-rt/lib/ubsan/ubsan_value.h b/compiler-rt/lib/ubsan/ubsan_value.h
@@ -103,6 +103,13 @@ class TypeDescriptor {
     /// representation is that of bitcasting the floating-point value to an
     /// integer type.
     TK_Float = 0x0001,
+    /// An _BitInt(N) type. Lowest bit is 1 for a signed value, 0 for an
+    /// unsigned value. Remaining bits are log_2(bit_width). The value
+    /// representation is the integer itself if it fits into a ValueHandle, and
+    /// a pointer to the integer otherwise. TypeName contains the true width
+    /// of the type for the signed _BitInt(N) type stored after zero bit after
+    /// TypeName as 32-bit unsigned integer.
+    TK_BitInt = 0x0002,
     /// Any other type. The value representation is unspecified.
     TK_Unknown = 0xffff
   };
@@ -113,10 +120,15 @@ class TypeDescriptor {
     return static_cast<Kind>(TypeKind);
   }
 
-  bool isIntegerTy() const { return getKind() == TK_Integer; }
+  bool isIntegerTy() const {
+    return getKind() == TK_Integer || getKind() == TK_BitInt;
+  }
+  bool isBitIntTy() const { return getKind() == TK_BitInt; }
+
   bool isSignedIntegerTy() const {
     return isIntegerTy() && (TypeInfo & 1);
   }
+  bool isSignedBitIntTy() const { return isBitIntTy() && (TypeInfo & 1); }
   bool isUnsignedIntegerTy() const {
     return isIntegerTy() && !(TypeInfo & 1);
   }
@@ -125,6 +137,26 @@ class TypeDescriptor {
     return 1 << (TypeInfo >> 1);
   }
 
+  const char *getBitIntBitCountPointer() const {
+    CHECK(isBitIntTy());
+    CHECK(isSignedBitIntTy());
+    // Scan Name for zero and return the next address
+    const char *p = getTypeName();
+    while (*p != '\0') {
+      ++p;
+    }
+    // Return the next address
+    return p + 1;
+  }
+
+  unsigned getIntegerBitCount() const {
+    CHECK(isIntegerTy());
+    if (isSignedBitIntTy())
+      return *reinterpret_cast<const u32 *>(getBitIntBitCountPointer());
+    else
+      return getIntegerBitWidth();
+  }
+
   bool isFloatTy() const { return getKind() == TK_Float; }
   unsigned getFloatBitWidth() const {
     CHECK(isFloatTy());
diff --git a/compiler-rt/test/ubsan/TestCases/Integer/bit-int-pass.c b/compiler-rt/test/ubsan/TestCases/Integer/bit-int-pass.c
@@ -0,0 +1,39 @@
+// RUN: %clang -Wno-constant-conversion -Wno-array-bounds -Wno-division-by-zero -Wno-shift-negative-value -Wno-shift-count-negative -Wno-int-to-pointer-cast -O0 -fsanitize=alignment,array-bounds,bool,float-cast-overflow,implicit-integer-sign-change,implicit-signed-integer-truncation,implicit-unsigned-integer-truncation,integer-divide-by-zero,nonnull-attribute,null,nullability-arg,nullability-assign,nullability-return,pointer-overflow,returns-nonnull-attribute,shift-base,shift-exponent,signed-integer-overflow,unreachable,unsigned-integer-overflow,unsigned-shift-base,vla-bound %s -o %t1 && %run %t1 2>&1 | FileCheck %s
+
+#include <stdint.h>
+#include <stdio.h>
+
+// In this test there is an expectation of assignment of _BitInt not producing any output.
+uint32_t nullability_arg(_BitInt(37) *_Nonnull x)
+    __attribute__((no_sanitize("address"))) {
+  _BitInt(37) y = *(_BitInt(37) *)&x;
+  return y;
+}
+
+// In this test there is an expectation of ubsan not triggeting on returning random address which is inside address space of the process.
+_BitInt(37) nonnull_attribute(__attribute__((nonnull)) _BitInt(37) * x)
+    __attribute__((no_sanitize("address"))) {
+  return *(_BitInt(37) *)&x;
+}
+
+// In this test there is an expectation of assignment of uint32_t from "invalid" _BitInt is not producing any output.
+uint32_t nullability_assign(_BitInt(7) * x)
+    __attribute__((no_sanitize("address"))) {
+  _BitInt(7) *_Nonnull y = x;
+  int32_t r = *(_BitInt(7) *)&y;
+  return (r > 0) ? r : 0;
+}
+
+// In those examples the file is expected to compile&run with no diagnostics
+// CHECK-NOT: runtime error:
+
+int main(int argc, char **argv) {
+  // clang-format off
+  uint64_t result =
+      1ULL +
+      nullability_arg((_BitInt(37) *)argc) +
+      ((uint64_t)nonnull_attribute((_BitInt(37) *)argc) & 0xFFFFFFFF) +
+      nullability_assign((_BitInt(7) *)argc);
+  // clang-format on
+  printf("%u\n", (uint32_t)(result & 0xFFFFFFFF));
+}
diff --git a/compiler-rt/test/ubsan/TestCases/Integer/bit-int.c b/compiler-rt/test/ubsan/TestCases/Integer/bit-int.c
@@ -0,0 +1,169 @@
+// RUN: %clang -Wno-constant-conversion -Wno-array-bounds -Wno-division-by-zero -Wno-shift-negative-value -Wno-shift-count-negative -Wno-int-to-pointer-cast -O0 -fsanitize=alignment,array-bounds,bool,float-cast-overflow,implicit-integer-sign-change,implicit-signed-integer-truncation,implicit-unsigned-integer-truncation,integer-divide-by-zero,nonnull-attribute,null,nullability-arg,nullability-assign,nullability-return,pointer-overflow,returns-nonnull-attribute,shift-base,shift-exponent,signed-integer-overflow,unreachable,unsigned-integer-overflow,unsigned-shift-base,vla-bound %s -o %t1 && %run %t1 2>&1 | FileCheck %s --check-prefix=RUNTIME
+// RUN: %clang -Wno-constant-conversion -Wno-array-bounds -Wno-division-by-zero -Wno-shift-negative-value -Wno-shift-count-negative -Wno-int-to-pointer-cast -fsanitize=array-bounds,enum,float-cast-overflow,integer-divide-by-zero,implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change,unsigned-integer-overflow,signed-integer-overflow,shift-base,shift-exponent -O0 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=IR
+
+#include <stdint.h>
+#include <stdio.h>
+
+uint32_t float_divide_by_zero() {
+  float f = 1.0f / 0.0f;
+  // IR: constant { i16, i16, [8 x i8] } { i16 1, i16 32, [8 x i8] c"'float'\00" }
+  _BitInt(37) r = (_BitInt(37))f;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:19: runtime error: inf is outside the range of representable values of type
+  // IR: constant { i16, i16, [20 x i8] } { i16 2, i16 13, [20 x i8] c"'_BitInt(37)'\00%\00\00\00\00\00" }
+  return r;
+}
+
+uint32_t integer_divide_by_zero() __attribute__((no_sanitize("memory"))) {
+  _BitInt(37) x = 1 / 0;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:21: runtime error: division by zero
+  // IR: constant { i16, i16, [32 x i8] } { i16 0, i16 10, [32 x i8] c"'uint32_t' (aka 'unsigned int')\00" }
+  return x;
+}
+
+uint32_t implicit_unsigned_integer_truncation() {
+  unsigned _BitInt(37) x = 2U;
+  x += float_divide_by_zero();
+  x += integer_divide_by_zero();
+  x = x + 0xFFFFFFFFFFFFFFFFULL;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:9: runtime error: unsigned integer overflow:
+  // IR: constant { i16, i16, [23 x i8] } { i16 0, i16 12, [23 x i8] c"'unsigned _BitInt(37)'\00" }
+  uint32_t r = x & 0xFFFFFFFF;
+  return r;
+}
+
+uint32_t pointer_overflow() __attribute__((no_sanitize("address"))) {
+  _BitInt(37) *x = (_BitInt(37) *)1;
+  _BitInt(37) *y = x - 1;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:22: runtime error: pointer index expression with base
+  uint32_t r = *(_BitInt(37) *)&y;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:16: runtime error: implicit conversion from type
+  return r;
+}
+
+uint32_t vla_bound(_BitInt(37) x) {
+  _BitInt(37) a[x - 1];
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:17: runtime error: variable length array bound evaluates to non-positive value
+  return 0;
+}
+
+uint32_t unsigned_shift_base() {
+  unsigned _BitInt(37) x = ~0U << 1;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:32: runtime error: left shift of 4294967295 by 1 places cannot be represented in type
+  return x;
+}
+
+uint32_t array_bounds() {
+  _BitInt(37) x[4];
+  _BitInt(37) y = x[10];
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:19: runtime error: index 10 out of bounds for type
+  // IR: constant { i16, i16, [17 x i8] } { i16 -1, i16 0, [17 x i8] c"'_BitInt(37)[4]'\00" }
+  return (uint32_t)y;
+}
+
+uint32_t float_cast_overflow() {
+  float a = 100000000.0f;
+  _BitInt(7) b = (_BitInt(7))a;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:18: runtime error: 1e+08 is outside the range of representable values of type
+  // IR: constant { i16, i16, [19 x i8] } { i16 2, i16 7, [19 x i8] c"'_BitInt(7)'\00\07\00\00\00\00\00" }
+  return b;
+}
+
+uint32_t implicit_integer_sign_change(unsigned _BitInt(37) x) {
+  _BitInt(37) r = x;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:19: runtime error: implicit conversion from type '{{[^']+}}' of value
+  return r & 0xFFFFFFFF;
+}
+
+_BitInt(13) implicit_signed_integer_truncation() {
+  _BitInt(73) x = (_BitInt(73)) ~((~0UL) >> 1);
+  return x;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:10: runtime error: implicit conversion from type
+  // IR: constant { i16, i16, [20 x i8] } { i16 2, i16 {{([[:xdigit:]]{2})}}, [20 x i8] c"'_BitInt(73)'\00I\00\00\00\00\00" }
+  // IR: constant { i16, i16, [20 x i8] } { i16 2, i16 9, [20 x i8] c"'_BitInt(13)'\00\0D\00\00\00\00\00" }
+}
+
+_BitInt(37) shift_exponent() {
+  _BitInt(37) x = 1 << (-1);
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:21: runtime error: shift exponent -1 is negative
+  return x;
+}
+
+_BitInt(37) shift_base() {
+  _BitInt(37) x = (-1) << 1;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:24: runtime error: left shift of negative value -1
+  return x;
+}
+
+uint32_t negative_shift1(unsigned _BitInt(37) x) {
+  _BitInt(9) c = -2;
+  return x >> c;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:12: runtime error: shift exponent -2 is negative
+  // IR: constant { i16, i16, [19 x i8] } { i16 2, i16 9, [19 x i8] c"'_BitInt(9)'\00\09\00\00\00\00\00" }
+}
+
+uint32_t negative_shift2(unsigned _BitInt(37) x) {
+  _BitInt(17) c = -2;
+  return x >> c;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:12: runtime error: shift exponent -2 is negative
+  // IR: constant { i16, i16, [20 x i8] } { i16 2, i16 11, [20 x i8] c"'_BitInt(17)'\00\11\00\00\00\00\00" }
+}
+
+uint32_t negative_shift3(unsigned _BitInt(37) x) {
+  _BitInt(34) c = -2;
+  return x >> c;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:12: runtime error: shift exponent -2 is negative
+  // IR: constant { i16, i16, [20 x i8] } { i16 2, i16 13, [20 x i8] c"'_BitInt(34)'\00\22\00\00\00\00\00" }
+}
+
+uint32_t negative_shift4(unsigned _BitInt(37) x) {
+  int64_t c = -2;
+  return x >> c;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:12: runtime error: shift exponent -2 is negative
+}
+
+uint32_t negative_shift5(unsigned _BitInt(37) x) {
+  _BitInt(68) c = -2;
+  return x >> c;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:12: runtime error: shift exponent -2 is negative
+  // CHECK-IR: constant { i16, i16, [20 x i8] } { i16 2, i16 {{([[:xdigit:]]{2})}}, [20 x i8] c"'_BitInt(68)'\00D\00\00\00\00\00" }
+}
+
+uint32_t unsigned_integer_overflow() {
+  unsigned _BitInt(37) x = ~0U;
+  ++x;
+  return x;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:10: runtime error: implicit conversion from type
+}
+
+// In this test no run-time overflow expected, so no diagnostics here, but should be a conversion error from the negative number on return.
+uint32_t signed_integer_overflow() {
+  _BitInt(37) x = (_BitInt(37)) ~((0x8FFFFFFFFFFFFFFFULL) >> 1);
+  --x;
+  return x;
+  // RUNTIME: {{.*}}bit-int.c:[[@LINE-1]]:10: runtime error: implicit conversion from type
+}
+
+int main(int argc, char **argv) {
+  // clang-format off
+  uint64_t result =
+      1ULL +
+      implicit_unsigned_integer_truncation() +
+      pointer_overflow() +
+      vla_bound(argc) +
+      unsigned_shift_base() +
+      (uint32_t)array_bounds() +
+      float_cast_overflow() +
+      implicit_integer_sign_change((unsigned _BitInt(37))(argc - 2)) +
+      (uint64_t)implicit_signed_integer_truncation() +
+      shift_exponent() +
+      (uint32_t)shift_base() +
+      negative_shift1(5) +
+      negative_shift2(5) +
+      negative_shift3(5) +
+      negative_shift4(5) +
+      negative_shift5(5) +
+      unsigned_integer_overflow() +
+      signed_integer_overflow();
+  // clang-format on
+  printf("%u\n", (uint32_t)(result & 0xFFFFFFFF));
+}