[CIR][CIRGen][Builtin] Support __sync_add_and_fetch (#1077)

ghehg · web-flow · commit ab1df14af817 · 2024-11-11T12:08:57.000-08:00
Notable change is to introduce helper function `buildBinaryAtomicPost` which models on [OG's `EmitBinaryAtomicPost`](https://github.com/llvm/clangir/blob/dbf320e5c3db0410566ae561067c595308870bad/clang/lib/CodeGen/CGBuiltin.cpp#L340C15-L340C35). Comparing to `EmitBinaryAtomicPost`, `buildBinaryAtomicPost` is more concise as OG's ``EmitBinaryAtomicPost`` duplicates quite a bit of code from [MakeBinaryAtomicValue](https://github.com/llvm/clangir/blob/dbf320e5c3db0410566ae561067c595308870bad/clang/lib/CodeGen/CGBuiltin.cpp#L340) Also, didn't implement invert as __sync_add_and_fetch does not need it, but will add it (which is a trivial work) when we implement a builtin that needs it. Test cases are from [OG](https://github.com/llvm/clangir/blob/dbf320e5c3db0410566ae561067c595308870bad/clang/test/CodeGen/Atomics.c#L134)
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
@@ -229,6 +229,7 @@ struct MissingFeatures {
   static bool emitConstrainedFPCall() { return false; }
   static bool emitEmptyRecordCheck() { return false; }
   static bool isPPC_FP128Ty() { return false; }
+  static bool emitBinaryAtomicPostHasInvert() { return false; }
 
   // Inline assembly
   static bool asmGoto() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -244,6 +244,7 @@ static Address checkAtomicAlignment(CIRGenFunction &CGF, const CallExpr *E) {
 /// and the expression node.
 static mlir::Value makeBinaryAtomicValue(
     CIRGenFunction &cgf, cir::AtomicFetchKind kind, const CallExpr *expr,
+    mlir::Value *neededValP = nullptr, mlir::Type *neededValT = nullptr,
     cir::MemOrder ordering = cir::MemOrder::SequentiallyConsistent) {
 
   QualType typ = expr->getType();
@@ -263,7 +264,15 @@ static mlir::Value makeBinaryAtomicValue(
   mlir::Value val = cgf.emitScalarExpr(expr->getArg(1));
   mlir::Type valueType = val.getType();
   val = emitToInt(cgf, val, typ, intType);
-
+  // These output arguments are needed for post atomic fetch operations
+  // that calculate the result of the operation as return value of
+  // <binop>_and_fetch builtins. The `AtomicFetch` operation only updates the
+  // memory location and returns the old value.
+  if (neededValP) {
+    assert(neededValT);
+    *neededValP = val;
+    *neededValT = valueType;
+  }
   auto rmwi = builder.create<cir::AtomicFetch>(
       cgf.getLoc(expr->getSourceRange()), destAddr.emitRawPointer(), val, kind,
       ordering, false, /* is volatile */
@@ -276,6 +285,26 @@ static RValue emitBinaryAtomic(CIRGenFunction &CGF, cir::AtomicFetchKind kind,
   return RValue::get(makeBinaryAtomicValue(CGF, kind, E));
 }
 
+static RValue emitBinaryAtomicPost(CIRGenFunction &cgf,
+                                   cir::AtomicFetchKind atomicOpkind,
+                                   const CallExpr *e,
+                                   cir::BinOpKind binopKind) {
+  mlir::Value val;
+  mlir::Type valueType;
+  clang::QualType typ = e->getType();
+  mlir::Value result =
+      makeBinaryAtomicValue(cgf, atomicOpkind, e, &val, &valueType);
+  clang::CIRGen::CIRGenBuilderTy &builder = cgf.getBuilder();
+  result = builder.create<cir::BinOp>(result.getLoc(), binopKind, result, val);
+  result = emitFromInt(cgf, result, typ, valueType);
+  // FIXME: Some callers of this function expect the result to be inverted,
+  // which would need invert flag passed in and do the inversion here like
+  // traditional clang code gen does. When we implment those caller builtins
+  // we should implement the inversion here.
+  assert(!MissingFeatures::emitBinaryAtomicPostHasInvert());
+  return RValue::get(result);
+}
+
 static mlir::Value MakeAtomicCmpXchgValue(CIRGenFunction &cgf,
                                           const CallExpr *expr,
                                           bool returnBool) {
@@ -1626,7 +1655,8 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__sync_add_and_fetch_4:
   case Builtin::BI__sync_add_and_fetch_8:
   case Builtin::BI__sync_add_and_fetch_16:
-    llvm_unreachable("BI__sync_add_and_fetch like NYI");
+    return emitBinaryAtomicPost(*this, cir::AtomicFetchKind::Add, E,
+                                cir::BinOpKind::Add);
 
   case Builtin::BI__sync_sub_and_fetch_1:
   case Builtin::BI__sync_sub_and_fetch_2:
diff --git a/clang/test/CIR/CodeGen/atomic.cpp b/clang/test/CIR/CodeGen/atomic.cpp
@@ -12,6 +12,15 @@ typedef struct _a {
 
 void m() { at y; }
 
+signed char sc;
+unsigned char uc;
+signed short ss;
+unsigned short us;
+signed int si;
+unsigned int ui;
+signed long long sll;
+unsigned long long ull;
+
 // CHECK: ![[A:.*]] = !cir.struct<struct "_a" {!s32i}>
 
 int basic_binop_fetch(int *i) {
@@ -649,3 +658,85 @@ void cmp_val_ushort(unsigned short* p, short x, short u) {
 void cmp_val_ulong(unsigned long* p, long x, long u) {
   long r = __sync_val_compare_and_swap(p, x, u);
 }
+
+// CHECK-LABEL: @test_op_and_fetch
+// LLVM-LABEL: @test_op_and_fetch
+extern "C" void test_op_and_fetch (void)
+{
+  // CHECK: [[VAL0:%.*]] = cir.cast(integral, {{%.*}} : !u8i), !s8i
+  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
+  // CHECK: [[RET0:%.*]] = cir.binop(add, [[RES0]], [[VAL0]]) : !s8i
+  // LLVM:  [[VAL0:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[RES0:%.*]] = atomicrmw add ptr @sc, i8 [[VAL0]] seq_cst, align 1
+  // LLVM:  [[RET0:%.*]] = add i8 [[RES0]], [[VAL0]]
+  // LLVM:  store i8 [[RET0]], ptr @sc, align 1
+  sc = __sync_add_and_fetch (&sc, uc); 
+
+  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
+  // CHECK: [[RET1:%.*]] = cir.binop(add, [[RES1]], [[VAL1]]) : !u8i
+  // LLVM:  [[VAL1:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[RES1:%.*]] = atomicrmw add ptr @uc, i8 [[VAL1]] seq_cst, align 1
+  // LLVM:  [[RET1:%.*]] = add i8 [[RES1]], [[VAL1]]
+  // LLVM:  store i8 [[RET1]], ptr @uc, align 1
+  uc = __sync_add_and_fetch (&uc, uc); 
+  
+  // CHECK: [[VAL2:%.*]] = cir.cast(integral, {{%.*}} : !u8i), !s16i
+  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
+  // CHECK: [[RET2:%.*]] = cir.binop(add, [[RES2]], [[VAL2]]) : !s16i
+  // LLVM:  [[VAL2:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // LLVM:  [[RES2:%.*]] = atomicrmw add ptr @ss, i16 [[CONV2]] seq_cst, align 2
+  // LLVM:  [[RET2:%.*]] = add i16 [[RES2]], [[CONV2]]
+  // LLVM:  store i16 [[RET2]], ptr @ss, align 2
+  ss = __sync_add_and_fetch (&ss, uc); 
+
+  // CHECK: [[VAL3:%.*]] = cir.cast(integral, {{%.*}} : !u8i), !u16i
+  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
+  // CHECK: [[RET3:%.*]] = cir.binop(add, [[RES3]], [[VAL3]]) : !u16i
+  // LLVM:  [[VAL3:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // LLVM:  [[RES3:%.*]] = atomicrmw add ptr @us, i16 [[CONV3]] seq_cst, align 2
+  // LLVM:  [[RET3:%.*]] = add i16 [[RES3]], [[CONV3]]
+  // LLVM:  store i16 [[RET3]], ptr @us
+  us = __sync_add_and_fetch (&us, uc); 
+
+  // CHECK: [[VAL4:%.*]] = cir.cast(integral, {{%.*}} : !u8i), !s32i
+  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
+  // CHECK: [[RET4:%.*]] = cir.binop(add, [[RES4]], [[VAL4]]) : !s32i
+  // LLVM:  [[VAL4:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // LLVM:  [[RES4:%.*]] = atomicrmw add ptr @si, i32 [[CONV4]] seq_cst, align 4
+  // LLVM:  [[RET4:%.*]] = add i32 [[RES4]], [[CONV4]]
+  // LLVM:  store i32 [[RET4]], ptr @si, align 4
+  si = __sync_add_and_fetch (&si, uc); 
+
+  // CHECK: [[VAL5:%.*]] = cir.cast(integral, {{%.*}} : !u8i), !u32i
+  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
+  // CHECK: [[RET5:%.*]] = cir.binop(add, [[RES5]], [[VAL5]]) : !u32i
+  // LLVM:  [[VAL5:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // LLVM:  [[RES5:%.*]] = atomicrmw add ptr @ui, i32 [[CONV5]] seq_cst, align 4
+  // LLVM:  [[RET5:%.*]] = add i32 [[RES5]], [[CONV5]]
+  // LLVM:  store i32 [[RET5]], ptr @ui, align 4
+  ui = __sync_add_and_fetch (&ui, uc); 
+
+  // CHECK: [[VAL6:%.*]] = cir.cast(integral, {{%.*}} : !u8i), !s64i
+  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
+  // CHECK: [[RET6:%.*]] = cir.binop(add, [[RES6]], [[VAL6]]) : !s64i
+  // LLVM:  [[VAL6:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // LLVM:  [[RES6:%.*]] = atomicrmw add ptr @sll, i64 [[CONV6]] seq_cst, align 8
+  // LLVM:  [[RET6:%.*]] = add i64 [[RES6]], [[CONV6]]
+  // LLVM:  store i64 [[RET6]], ptr @sll, align 8
+  sll = __sync_add_and_fetch (&sll, uc); 
+
+  // CHECK: [[VAL7:%.*]] = cir.cast(integral, {{%.*}} : !u8i), !u64i
+  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
+  // CHECK: [[RET7:%.*]] = cir.binop(add, [[RES7]], [[VAL7]]) : !u64i
+  // LLVM:  [[VAL7:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // LLVM:  [[RES7:%.*]] = atomicrmw add ptr @ull, i64 [[CONV7]] seq_cst, align 8
+  // LLVM:  [[RET7:%.*]] = add i64 [[RES7]], [[CONV7]]
+  // LLVM:  store i64 [[RET7]], ptr @ull, align 8
+  ull = __sync_add_and_fetch (&ull, uc); 
+}