[ubsan] Display correct runtime messages for negative _BitInt (#96240)

earnol · Eänolituri Lómitaurë · AaronBallman · web-flow · commit 75cb7de404ee · 2024-08-15T10:15:27.000-04:00
Without this patch compiler-rt ubsan library has a bug displaying incorrect values for variables of the _BitInt (previously called _ExtInt) type. This patch affects affects both: generation of metadata inside code generator and runtime part. The runtime part provided only for i386 and x86_64 runtimes. Other runtimes should be updated to take full benefit of this patch. The patch is constructed the way to be backward compatible and int and float type runtime diagnostics should be unaffected for not yet updated runtimes. This patch fixes issue #64100. Co-authored-by: Eänolituri Lómitaurë <vladislav.aranov@ericsson.com> Co-authored-by: Aaron Ballman <aaron@aaronballman.com> Co-authored-by: Paul Kirth <paulkirth@google.com>
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
@@ -42,6 +42,7 @@
 #include "llvm/IR/MatrixBuilder.h"
 #include "llvm/Passes/OptimizationLevel.h"
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SaveAndRestore.h"
@@ -65,6 +66,22 @@ static llvm::cl::opt<bool> ClSanitizeGuardChecks(
     "ubsan-guard-checks", llvm::cl::Optional,
     llvm::cl::desc("Guard UBSAN checks with `llvm.allow.ubsan.check()`."));
 
+//===--------------------------------------------------------------------===//
+//                        Defines for metadata
+//===--------------------------------------------------------------------===//
+
+// Those values are crucial to be the SAME as in ubsan runtime library.
+enum VariableTypeDescriptorKind : uint16_t {
+  /// An integer type.
+  TK_Integer = 0x0000,
+  /// A floating-point type.
+  TK_Float = 0x0001,
+  /// An _BitInt(N) type.
+  TK_BitInt = 0x0002,
+  /// Any other type. The value representation is unspecified.
+  TK_Unknown = 0xffff
+};
+
 //===--------------------------------------------------------------------===//
 //                        Miscellaneous Helper Methods
 //===--------------------------------------------------------------------===//
@@ -3288,22 +3305,40 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
 ///   { i16 TypeKind, i16 TypeInfo }
 /// \endcode
 ///
-/// followed by an array of i8 containing the type name. TypeKind is 0 for an
-/// integer, 1 for a floating point value, and -1 for anything else.
+/// followed by an array of i8 containing the type name with extra information
+/// for BitInt. TypeKind is TK_Integer(0) for an integer, TK_Float(1) for a
+/// floating point value, TK_BitInt(2) for BitInt and TK_Unknown(0xFFFF) for
+/// anything else.
 llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
   // Only emit each type's descriptor once.
   if (llvm::Constant *C = CGM.getTypeDescriptorFromMap(T))
     return C;
 
-  uint16_t TypeKind = -1;
+  uint16_t TypeKind = TK_Unknown;
   uint16_t TypeInfo = 0;
+  bool IsBitInt = false;
 
   if (T->isIntegerType()) {
-    TypeKind = 0;
+    TypeKind = TK_Integer;
     TypeInfo = (llvm::Log2_32(getContext().getTypeSize(T)) << 1) |
                (T->isSignedIntegerType() ? 1 : 0);
+    // Follow suggestion from discussion of issue 64100.
+    // So we can write the exact amount of bits in TypeName after '\0'
+    // making it <diagnostic-like type name>.'\0'.<32-bit width>.
+    if (T->isSignedIntegerType() && T->getAs<BitIntType>()) {
+      // Do a sanity checks as we are using 32-bit type to store bit length.
+      assert(getContext().getTypeSize(T) > 0 &&
+             " non positive amount of bits in __BitInt type");
+      assert(getContext().getTypeSize(T) <= 0xFFFFFFFF &&
+             " too many bits in __BitInt type");
+
+      // Redefine TypeKind with the actual __BitInt type if we have signed
+      // BitInt.
+      TypeKind = TK_BitInt;
+      IsBitInt = true;
+    }
   } else if (T->isFloatingType()) {
-    TypeKind = 1;
+    TypeKind = TK_Float;
     TypeInfo = getContext().getTypeSize(T);
   }
 
@@ -3314,6 +3349,20 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
       DiagnosticsEngine::ak_qualtype, (intptr_t)T.getAsOpaquePtr(), StringRef(),
       StringRef(), std::nullopt, Buffer, std::nullopt);
 
+  if (IsBitInt) {
+    // The Structure is: 0 to end the string, 32 bit unsigned integer in target
+    // endianness, zero.
+    char S[6] = {'\0', '\0', '\0', '\0', '\0', '\0'};
+    const auto *EIT = T->castAs<BitIntType>();
+    uint32_t Bits = EIT->getNumBits();
+    llvm::support::endian::write32(S + 1, Bits,
+                                   getTarget().isBigEndian()
+                                       ? llvm::endianness::big
+                                       : llvm::endianness::little);
+    StringRef Str = StringRef(S, sizeof(S) / sizeof(decltype(S[0])));
+    Buffer.append(Str);
+  }
+
   llvm::Constant *Components[] = {
     Builder.getInt16(TypeKind), Builder.getInt16(TypeInfo),
     llvm::ConstantDataArray::getString(getLLVMContext(), Buffer)
diff --git a/clang/test/CodeGen/bit-int-ubsan.c b/clang/test/CodeGen/bit-int-ubsan.c
@@ -0,0 +1,96 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang -Wno-constant-conversion -Wno-array-bounds -Wno-division-by-zero -Wno-shift-negative-value -Wno-shift-count-negative -Wno-int-to-pointer-cast -fsanitize=array-bounds,enum,float-cast-overflow,integer-divide-by-zero,implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change,unsigned-integer-overflow,signed-integer-overflow,shift-base,shift-exponent -O0 -S -emit-llvm -o - %s | FileCheck %s
+
+// The runtime test checking the _BitInt ubsan feature is located in compiler-rt/test/ubsan/TestCases/Integer/bit-int.c
+
+#include <stdint.h>
+#include <stdio.h>
+
+uint32_t float_divide_by_zero() {
+  float f = 1.0f / 0.0f;
+  // CHECK: constant { i16, i16, [8 x i8] } { i16 1, i16 32, [8 x i8] c"'float'\00" }
+  _BitInt(37) r = (_BitInt(37))f;
+  // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 13, [20 x i8] c"'_BitInt(37)'\00%\00\00\00\00\00" }
+  return r;
+}
+
+uint32_t integer_divide_by_zero() __attribute__((no_sanitize("memory"))) {
+  _BitInt(37) x = 1 / 0;
+  // CHECK: constant { i16, i16, [32 x i8] } { i16 0, i16 10, [32 x i8] c"'uint32_t' (aka 'unsigned int')\00" }
+  return x;
+}
+
+uint32_t implicit_unsigned_integer_truncation() {
+  unsigned _BitInt(37) x = 2U;
+  x += float_divide_by_zero();
+  x += integer_divide_by_zero();
+  x = x + 0xFFFFFFFFFFFFFFFFULL;
+  // CHECK: constant { i16, i16, [23 x i8] } { i16 0, i16 12, [23 x i8] c"'unsigned _BitInt(37)'\00" }
+  uint32_t r = x & 0xFFFFFFFF;
+  return r;
+}
+
+uint32_t array_bounds() {
+  _BitInt(37) x[4];
+  _BitInt(37) y = x[10];
+  // CHECK: constant { i16, i16, [17 x i8] } { i16 -1, i16 0, [17 x i8] c"'_BitInt(37)[4]'\00" }
+  return (uint32_t)y;
+}
+
+uint32_t float_cast_overflow() {
+  float a = 100000000.0f;
+  _BitInt(7) b = (_BitInt(7))a;
+  // CHECK: constant { i16, i16, [19 x i8] } { i16 2, i16 7, [19 x i8] c"'_BitInt(7)'\00\07\00\00\00\00\00" }
+  return b;
+}
+
+_BitInt(13) implicit_signed_integer_truncation() {
+  _BitInt(73) x = (_BitInt(73)) ~((~0UL) >> 1);
+  return x;
+  // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 {{([[:xdigit:]]{2})}}, [20 x i8] c"'_BitInt(73)'\00I\00\00\00\00\00" }
+  // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 9, [20 x i8] c"'_BitInt(13)'\00\0D\00\00\00\00\00" }
+}
+
+uint32_t negative_shift1(unsigned _BitInt(37) x)
+    __attribute__((no_sanitize("memory"))) {
+  _BitInt(9) c = -2;
+  return x >> c;
+  // CHECK: constant { i16, i16, [19 x i8] } { i16 2, i16 9, [19 x i8] c"'_BitInt(9)'\00\09\00\00\00\00\00" }
+}
+
+uint32_t negative_shift2(unsigned _BitInt(37) x)
+    __attribute__((no_sanitize("memory"))) {
+  _BitInt(17) c = -2;
+  return x >> c;
+  // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 11, [20 x i8] c"'_BitInt(17)'\00\11\00\00\00\00\00" }
+}
+
+uint32_t negative_shift3(unsigned _BitInt(37) x)
+    __attribute__((no_sanitize("memory"))) {
+  _BitInt(34) c = -2;
+  return x >> c;
+  // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 13, [20 x i8] c"'_BitInt(34)'\00\22\00\00\00\00\00" }
+}
+
+uint32_t negative_shift5(unsigned _BitInt(37) x)
+    __attribute__((no_sanitize("memory"))) {
+  _BitInt(68) c = -2;
+  return x >> c;
+  // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 {{([[:xdigit:]]{2})}}, [20 x i8] c"'_BitInt(68)'\00D\00\00\00\00\00" }
+}
+
+int main(int argc, char **argv) {
+  // clang-format off
+  uint64_t result =
+      1ULL +
+      implicit_unsigned_integer_truncation() +
+      (uint32_t)array_bounds() +
+      float_cast_overflow() +
+      (uint64_t)implicit_signed_integer_truncation() +
+      negative_shift1(5) +
+      negative_shift2(5) +
+      negative_shift3(5) +
+      negative_shift5(5);
+  // clang-format on
+  printf("%u\n", (uint32_t)(result & 0xFFFFFFFF));
+}
diff --git a/compiler-rt/lib/ubsan/ubsan_value.cpp b/compiler-rt/lib/ubsan/ubsan_value.cpp
@@ -67,18 +67,21 @@ const char *__ubsan::getObjCClassName(ValueHandle Pointer) {
 
 SIntMax Value::getSIntValue() const {
   CHECK(getType().isSignedIntegerTy());
+  // Val was zero-extended to ValueHandle. Sign-extend from original width
+  // to SIntMax.
+  const unsigned ExtraBits =
+      sizeof(SIntMax) * 8 - getType().getIntegerBitCount();
   if (isInlineInt()) {
-    // Val was zero-extended to ValueHandle. Sign-extend from original width
-    // to SIntMax.
-    const unsigned ExtraBits =
-      sizeof(SIntMax) * 8 - getType().getIntegerBitWidth();
     return SIntMax(UIntMax(Val) << ExtraBits) >> ExtraBits;
   }
-  if (getType().getIntegerBitWidth() == 64)
-    return *reinterpret_cast<s64*>(Val);
+  if (getType().getIntegerBitWidth() == 64) {
+    return SIntMax(UIntMax(*reinterpret_cast<s64 *>(Val)) << ExtraBits) >>
+           ExtraBits;
+  }
 #if HAVE_INT128_T
   if (getType().getIntegerBitWidth() == 128)
-    return *reinterpret_cast<s128*>(Val);
+    return SIntMax(UIntMax(*reinterpret_cast<s128 *>(Val)) << ExtraBits) >>
+           ExtraBits;
 #else
   if (getType().getIntegerBitWidth() == 128)
     UNREACHABLE("libclang_rt.ubsan was built without __int128 support");
diff --git a/compiler-rt/lib/ubsan/ubsan_value.h b/compiler-rt/lib/ubsan/ubsan_value.h
@@ -103,6 +103,13 @@ class TypeDescriptor {
     /// representation is that of bitcasting the floating-point value to an
     /// integer type.
     TK_Float = 0x0001,
+    /// An _BitInt(N) type. Lowest bit is 1 for a signed value, 0 for an
+    /// unsigned value. Remaining bits are log_2(bit_width). The value
+    /// representation is the integer itself if it fits into a ValueHandle, and
+    /// a pointer to the integer otherwise. TypeName contains the true width
+    /// of the type for the signed _BitInt(N) type stored after zero bit after
+    /// TypeName as 32-bit unsigned integer.
+    TK_BitInt = 0x0002,
     /// Any other type. The value representation is unspecified.
     TK_Unknown = 0xffff
   };
@@ -113,10 +120,15 @@ class TypeDescriptor {
     return static_cast<Kind>(TypeKind);
   }
 
-  bool isIntegerTy() const { return getKind() == TK_Integer; }
+  bool isIntegerTy() const {
+    return getKind() == TK_Integer || getKind() == TK_BitInt;
+  }
+  bool isBitIntTy() const { return getKind() == TK_BitInt; }
+
   bool isSignedIntegerTy() const {
     return isIntegerTy() && (TypeInfo & 1);
   }
+  bool isSignedBitIntTy() const { return isBitIntTy() && (TypeInfo & 1); }
   bool isUnsignedIntegerTy() const {
     return isIntegerTy() && !(TypeInfo & 1);
   }
@@ -125,6 +137,25 @@ class TypeDescriptor {
     return 1 << (TypeInfo >> 1);
   }
 
+  const char *getBitIntBitCountPointer() const {
+    DCHECK(isBitIntTy());
+    DCHECK(isSignedBitIntTy());
+    // Scan Name for zero and return the next address
+    const char *p = getTypeName();
+    while (*p != '\0')
+      ++p;
+    // Return the next address
+    return p + 1;
+  }
+
+  unsigned getIntegerBitCount() const {
+    DCHECK(isIntegerTy());
+    if (isSignedBitIntTy())
+      return *reinterpret_cast<const u32 *>(getBitIntBitCountPointer());
+    else
+      return getIntegerBitWidth();
+  }
+
   bool isFloatTy() const { return getKind() == TK_Float; }
   unsigned getFloatBitWidth() const {
     CHECK(isFloatTy());
diff --git a/compiler-rt/test/ubsan/TestCases/Integer/bit-int-pass.c b/compiler-rt/test/ubsan/TestCases/Integer/bit-int-pass.c
@@ -0,0 +1,42 @@
+// RUN: %clang -Wno-constant-conversion -Wno-array-bounds -Wno-division-by-zero -Wno-shift-negative-value -Wno-shift-count-negative -Wno-int-to-pointer-cast -O0 -fsanitize=alignment,array-bounds,bool,float-cast-overflow,implicit-integer-sign-change,implicit-signed-integer-truncation,implicit-unsigned-integer-truncation,integer-divide-by-zero,nonnull-attribute,null,nullability-arg,nullability-assign,nullability-return,pointer-overflow,returns-nonnull-attribute,shift-base,shift-exponent,signed-integer-overflow,unreachable,unsigned-integer-overflow,unsigned-shift-base,vla-bound %s -o %t1 && %run %t1 2>&1 | FileCheck %s
+
+#include <stdint.h>
+#include <stdio.h>
+
+// In this test there is an expectation of assignment of _BitInt not producing any output.
+uint32_t nullability_arg(_BitInt(37) *_Nonnull x)
+    __attribute__((no_sanitize("address")))
+    __attribute__((no_sanitize("memory"))) {
+  _BitInt(37) y = *(_BitInt(37) *)&x;
+  return (y > 0) ? y : 0;
+}
+
+// In this test there is an expectation of ubsan not triggeting on returning random address which is inside address space of the process.
+_BitInt(37) nonnull_attribute(__attribute__((nonnull)) _BitInt(37) * x)
+    __attribute__((no_sanitize("address")))
+    __attribute__((no_sanitize("memory"))) {
+  return *(_BitInt(37) *)&x;
+}
+
+// In this test there is an expectation of assignment of uint32_t from "invalid" _BitInt is not producing any output.
+uint32_t nullability_assign(_BitInt(7) * x)
+    __attribute__((no_sanitize("address")))
+    __attribute__((no_sanitize("memory"))) {
+  _BitInt(7) *_Nonnull y = x;
+  int32_t r = *(_BitInt(7) *)&y;
+  return (r > 0) ? r : 0;
+}
+
+// In those examples the file is expected to compile & run with no diagnostics
+// CHECK-NOT: runtime error:
+
+int main(int argc, char **argv) {
+  // clang-format off
+  uint64_t result =
+      1ULL +
+      nullability_arg((_BitInt(37) *)argc) +
+      ((uint64_t)nonnull_attribute((_BitInt(37) *)argc) & 0xFFFFFFFF) +
+      nullability_assign((_BitInt(7) *)argc);
+  // clang-format on
+  printf("%u\n", (uint32_t)(result & 0xFFFFFFFF));
+}
diff --git a/compiler-rt/test/ubsan/TestCases/Integer/bit-int.c b/compiler-rt/test/ubsan/TestCases/Integer/bit-int.c