llvm-doe-org
diff --git a/‎clang/lib/CodeGen/CGOpenMPRuntime.cpp
Lines changed: 11 additions & 3 deletions b/‎clang/lib/CodeGen/CGOpenMPRuntime.cpp
Lines changed: 11 additions & 3 deletions
diff --git a/‎compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp
Lines changed: 10 additions & 0 deletions b/‎compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp
Lines changed: 10 additions & 0 deletions
diff --git a/‎compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
Lines changed: 15 additions & 0 deletions b/‎compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
Lines changed: 15 additions & 0 deletions
diff --git a/‎compiler-rt/lib/tsan/rtl/tsan_rtl.h
Lines changed: 3 additions & 0 deletions b/‎compiler-rt/lib/tsan/rtl/tsan_rtl.h
Lines changed: 3 additions & 0 deletions
diff --git a/‎compiler-rt/lib/tsan/rtl/tsan_shadow.h
Lines changed: 7 additions & 2 deletions b/‎compiler-rt/lib/tsan/rtl/tsan_shadow.h
Lines changed: 7 additions & 2 deletions
diff --git a/‎openmp/tools/archer/ompt-tsan.cpp
Lines changed: 20 additions & 8 deletions b/‎openmp/tools/archer/ompt-tsan.cpp
Lines changed: 20 additions & 8 deletions
diff --git a/‎openmp/tools/archer/tests/races/parallel-for-array-reduction-no-barrier.c
Lines changed: 42 additions & 0 deletions b/‎openmp/tools/archer/tests/races/parallel-for-array-reduction-no-barrier.c
Lines changed: 42 additions & 0 deletions
diff --git a/‎openmp/tools/archer/tests/races/parallel-for-array-reduction-nowait.c
Lines changed: 42 additions & 0 deletions b/‎openmp/tools/archer/tests/races/parallel-for-array-reduction-nowait.c
Lines changed: 42 additions & 0 deletions
diff --git a/‎openmp/tools/archer/tests/races/parallel-for-reduction-no-barrier.c
Lines changed: 42 additions & 0 deletions b/‎openmp/tools/archer/tests/races/parallel-for-reduction-no-barrier.c
Lines changed: 42 additions & 0 deletions
@@ -5019,13 +5019,16 @@ llvm::Function *CGOpenMPRuntime::emitReductionFunction(
   Args.push_back(&RHSArg);
   const auto &CGFI =
       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  CodeGenFunction CGF(CGM);
   std::string Name = getReductionFuncName(ReducerName);
   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
                                     llvm::GlobalValue::InternalLinkage, Name,
                                     &CGM.getModule());
+  if (CGF.SanOpts.has(SanitizerKind::Thread)) {
+    return Fn;
+  }
   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
   Fn->setDoesNotRecurse();
-  CodeGenFunction CGF(CGM);
   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
 
   // Dst = (void*[n])(LHSArg);
@@ -5217,6 +5220,11 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
   llvm::Function *ReductionFn = emitReductionFunction(
       CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
       Privates, LHSExprs, RHSExprs, ReductionOps);
+  llvm::Value *ReductionFnP = ReductionFn;
+  if (CGF.SanOpts.has(SanitizerKind::Thread)) {
+    ReductionFnP = llvm::ConstantPointerNull::get(
+        llvm::PointerType::get(ReductionFn->getFunctionType(), 0));
+  }
 
   // 3. Create static kmp_critical_name lock = { 0 };
   std::string Name = getName({"reduction"});
@@ -5235,8 +5243,8 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
       ReductionArrayTySize,                  // size_type sizeof(RedList)
       RL,                                    // void *RedList
-      ReductionFn, // void (*) (void *, void *) <reduce_func>
-      Lock         // kmp_critical_name *&<lock>
+      ReductionFnP, // void (*) (void *, void *) <reduce_func>
+      Lock          // kmp_critical_name *&<lock>
   };
   llvm::Value *Res = CGF.EmitRuntimeCall(
       OMPBuilder.getOrCreateRuntimeFunction(
 
@@ -266,6 +266,16 @@ void INTERFACE_ATTRIBUTE AnnotateBenignRace(
   BenignRaceImpl(f, l, mem, 1, desc);
 }
 
+void INTERFACE_ATTRIBUTE AnnotateAllAtomicBegin(char *f, int l) {
+  SCOPED_ANNOTATION(AnnotateAllAtomicBegin);
+  ThreadAtomicBegin(thr, pc);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateAllAtomicEnd(char *f, int l) {
+  SCOPED_ANNOTATION(AnnotateAllAtomicEnd);
+  ThreadAtomicEnd(thr);
+}
+
 void INTERFACE_ATTRIBUTE AnnotateIgnoreReadsBegin(char *f, int l) {
   SCOPED_ANNOTATION(AnnotateIgnoreReadsBegin);
   ThreadIgnoreBegin(thr, pc);
 
@@ -1053,6 +1053,21 @@ void ThreadIgnoreEnd(ThreadState *thr) {
   }
 }
 
+void ThreadAtomicBegin(ThreadState* thr, uptr pc) {
+  thr->all_atomic++;
+//  CHECK_GT(thr->ignore_reads_and_writes, 0);
+  CHECK_EQ(thr->all_atomic, 1);
+  thr->fast_state.SetAtomicBit();
+}
+
+void ThreadAtomicEnd(ThreadState *thr) {
+  CHECK_GT(thr->all_atomic, 0);
+  thr->all_atomic--;
+  if (thr->all_atomic == 0) {
+    thr->fast_state.ClearAtomicBit();
+  }
+}
+
 #if !SANITIZER_GO
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
 uptr __tsan_testonly_shadow_stack_current_size() {
 
@@ -182,6 +182,7 @@ struct ThreadState {
   // for better performance.
   int ignore_reads_and_writes;
   int suppress_reports;
+  int all_atomic;
   // Go does not support ignores.
 #if !SANITIZER_GO
   IgnoreSet mop_ignore_set;
@@ -550,6 +551,8 @@ void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size);
 void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr,
                                          uptr size);
 
+void ThreadAtomicBegin(ThreadState *thr, uptr pc);
+void ThreadAtomicEnd(ThreadState *thr);
 void ThreadIgnoreBegin(ThreadState *thr, uptr pc);
 void ThreadIgnoreEnd(ThreadState *thr);
 void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc);
 
@@ -9,6 +9,7 @@
 #ifndef TSAN_SHADOW_H
 #define TSAN_SHADOW_H
 
+#include "sanitizer_common/sanitizer_common.h"
 #include "tsan_defs.h"
 
 namespace __tsan {
@@ -21,8 +22,8 @@ class FastState {
     part_.unused0_ = 0;
     part_.sid_ = static_cast<u8>(kFreeSid);
     part_.epoch_ = static_cast<u16>(kEpochLast);
-    part_.unused1_ = 0;
     part_.ignore_accesses_ = false;
+    part_.all_atomic_ = false;
   }
 
   void SetSid(Sid sid) { part_.sid_ = static_cast<u8>(sid); }
@@ -37,14 +38,18 @@ class FastState {
   void ClearIgnoreBit() { part_.ignore_accesses_ = 0; }
   bool GetIgnoreBit() const { return part_.ignore_accesses_; }
 
+  void SetAtomicBit() { part_.all_atomic_ = 1; }
+  void ClearAtomicBit() { part_.all_atomic_ = 0; }
+  bool GetAtomicBit() const { return part_.all_atomic_; }
+
  private:
   friend class Shadow;
   struct Parts {
     u32 unused0_ : 8;
     u32 sid_ : 8;
     u32 epoch_ : kEpochBits;
-    u32 unused1_ : 1;
     u32 ignore_accesses_ : 1;
+    u32 all_atomic_ : 1;
   };
   union {
     Parts part_;
 
@@ -169,7 +169,6 @@ void __attribute__((weak)) __tsan_flush_memory() {}
 // Thread Sanitizer is a tool that finds races in code.
 // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
 // tsan detects these exact functions by name.
-extern "C" {
 static void (*AnnotateHappensAfter)(const char *, int, const volatile void *);
 static void (*AnnotateHappensBefore)(const char *, int, const volatile void *);
 static void (*AnnotateIgnoreWritesBegin)(const char *, int);
@@ -183,7 +182,8 @@ static void *(*__tsan_get_current_fiber)();
 static void *(*__tsan_create_fiber)(unsigned flags);
 static void (*__tsan_destroy_fiber)(void *fiber);
 static void (*__tsan_switch_to_fiber)(void *fiber, unsigned flags);
-}
+static void (*AnnotateReductionBegin)(const char *, int);
+static void (*AnnotateReductionEnd)(const char *, int);
 
 // This marker is used to define a happens-before arc. The race detector will
 // infer an arc from the begin to the end when they share the same pointer
@@ -199,6 +199,10 @@ static void (*__tsan_switch_to_fiber)(void *fiber, unsigned flags);
 // Resume checking for racy writes.
 #define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
 
+// Maps to either AnnotateAllAtomics or AnnotateIgnoreWrites 
+#define TsanReductionBegin() AnnotateReductionBegin(__FILE__, __LINE__)
+#define TsanReductionEnd() AnnotateReductionEnd(__FILE__, __LINE__)
+
 // We don't really delete the clock for now
 #define TsanDeleteClock(cv)
 
@@ -841,7 +845,7 @@ static void ompt_tsan_sync_region(ompt_sync_region_t kind,
         // 2. execution of another task.
         // For the latter case we will re-enable tracking in task_switch.
         Data->InBarrier = true;
-        TsanIgnoreWritesBegin();
+        TsanReductionBegin();
       }
 
       break;
@@ -874,7 +878,7 @@ static void ompt_tsan_sync_region(ompt_sync_region_t kind,
       if (hasReductionCallback < ompt_set_always) {
         // We want to track writes after the barrier again.
         Data->InBarrier = false;
-        TsanIgnoreWritesEnd();
+        TsanReductionEnd();
       }
 
       char BarrierIndex = Data->BarrierIndex;
@@ -929,7 +933,7 @@ static void ompt_tsan_reduction(ompt_sync_region_t kind,
   case ompt_scope_begin:
     switch (kind) {
     case ompt_sync_region_reduction:
-      TsanIgnoreWritesBegin();
+      TsanReductionBegin();
       break;
     default:
       break;
@@ -938,7 +942,7 @@ static void ompt_tsan_reduction(ompt_sync_region_t kind,
   case ompt_scope_end:
     switch (kind) {
     case ompt_sync_region_reduction:
-      TsanIgnoreWritesEnd();
+      TsanReductionEnd();
       break;
     default:
       break;
@@ -1122,7 +1126,7 @@ static void ompt_tsan_task_schedule(ompt_data_t *first_task_data,
       FromTask->InBarrier) {
     // We want to ignore writes in the runtime code during barriers,
     // but not when executing tasks with user code!
-    TsanIgnoreWritesEnd();
+    TsanReductionEnd();
   }
 
   // task completed execution
@@ -1164,7 +1168,7 @@ static void ompt_tsan_task_schedule(ompt_data_t *first_task_data,
   // Legacy handling for missing reduction callback
   if (hasReductionCallback < ompt_set_always && ToTask->InBarrier) {
     // We re-enter runtime code which currently performs a barrier.
-    TsanIgnoreWritesBegin();
+    TsanReductionBegin();
   }
 
   // task suspended
@@ -1349,6 +1353,14 @@ static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num,
   findTsanFunction(__tsan_destroy_fiber, (void (*)(void *)));
   findTsanFunction(__tsan_get_current_fiber, (void *(*)()));
   findTsanFunction(__tsan_switch_to_fiber, (void (*)(void *, unsigned int)));
+  findTsanFunctionName(AnnotateReductionBegin, AnnotateAllAtomicBegin, (void (*)(const char *, int)));
+  findTsanFunctionName(AnnotateReductionEnd, AnnotateAllAtomicEnd, (void (*)(const char *, int)));
+  if (!AnnotateReductionBegin) {
+    AnnotateReductionBegin = AnnotateIgnoreWritesBegin;
+    AnnotateReductionEnd = AnnotateIgnoreWritesEnd;
+    if (archer_flags->verbose)
+      std::cout << "Archer uses fallback solution for reductions: might miss some race" << std::endl;
+  }
 
   SET_CALLBACK(thread_begin);
   SET_CALLBACK(thread_end);
 
@@ -0,0 +1,42 @@
+/*
+ * parallel-reduction.c -- Archer testcase
+ */
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/archer/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Number of threads is empirical: We need enough (>4) threads so that
+// the reduction is really performed hierarchically in the barrier!
+
+// RUN: env OMP_NUM_THREADS=3 %libarcher-compile-and-run-race | FileCheck %s
+// RUN: env OMP_NUM_THREADS=7 %libarcher-compile-and-run-race | FileCheck %s
+
+// REQUIRES: tsan
+#include <omp.h>
+#include <stdio.h>
+
+int main(int argc, char *argv[]) {
+  int var[10]={0,1,2,3,4,5,6,7,8,9};
+  
+#pragma omp parallel
+  {
+#pragma omp masked
+    var[5] = 23;
+#pragma omp for reduction(+ : var)
+    for (int i = 0; i < 1000; i++)
+      { var[i%10]++; }
+  }
+  fprintf(stderr, "DONE\n");
+  int error = (var[5] != 123);
+  return error;
+}
+
+// CHECK: ThreadSanitizer: data race
+// CHECK: DONE
+// CHECK: ThreadSanitizer: reported
@@ -0,0 +1,42 @@
+/*
+ * parallel-reduction.c -- Archer testcase
+ */
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/archer/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Number of threads is empirical: We need enough (>4) threads so that
+// the reduction is really performed hierarchically in the barrier!
+
+// RUN: env OMP_NUM_THREADS=3 %libarcher-compile-and-run-race | FileCheck %s
+// RUN: env OMP_NUM_THREADS=7 %libarcher-compile-and-run-race | FileCheck %s
+
+// REQUIRES: tsan
+#include <omp.h>
+#include <stdio.h>
+
+int main(int argc, char *argv[]) {
+  int var[10]={0,1,2,3,4,5,6,7,8,9};
+  
+#pragma omp parallel
+  {
+#pragma omp for reduction(+ : var) nowait
+    for (int i = 0; i < 1000; i++)
+      { var[i%10]++; }
+#pragma omp masked
+    var[5] += 23;
+  }
+  fprintf(stderr, "DONE\n");
+  int error = (var[5] != 123);
+  return error;
+}
+
+// CHECK: ThreadSanitizer: data race
+// CHECK: DONE
+// CHECK: ThreadSanitizer: reported
@@ -0,0 +1,42 @@
+/*
+ * parallel-reduction.c -- Archer testcase
+ */
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/archer/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Number of threads is empirical: We need enough (>4) threads so that
+// the reduction is really performed hierarchically in the barrier!
+
+// RUN: env OMP_NUM_THREADS=3 %libarcher-compile-and-run-race | FileCheck %s
+// RUN: env OMP_NUM_THREADS=7 %libarcher-compile-and-run-race | FileCheck %s
+
+// REQUIRES: tsan
+#include <omp.h>
+#include <stdio.h>
+
+int main(int argc, char *argv[]) {
+  int var = 0;
+  
+#pragma omp parallel
+  {
+#pragma omp masked
+    var = 23;
+#pragma omp for reduction(+ : var)
+    for (int i = 0; i < 100; i++)
+      { var++; }
+  }
+  fprintf(stderr, "DONE\n");
+  int error = (var != 123);
+  return error;
+}
+
+// CHECK: ThreadSanitizer: data race
+// CHECK: DONE
+// CHECK: ThreadSanitizer: reported