Skip to content

Commit 49e6e3b

Browse files
committed
[TSan] Add instrumentation of AVX2 and AVX512 instructions
Currently, ThreadSanitizer only instruments memory accesses up to a width of 128 bit and explicitly skips instrumentation of wider memory accesses. This means that TSan is blind for AVX2 and AVX512 memory instructions. This patch adds instrumentation and runtime support for 256bit and 512bit memory loads/stores. Additionally, vector gather/scatter instructions are considered for instrumentation. These instructions allow to gather individual data elements from memory into a single vector register and scatter the elements from a vector register into individual memory locations. Since the vector of addresses is passed as a 256bit / 512bit vector, the new interface functions are compiled separately with the specific compiler flags. This avoids that AVX instructions are introduced into other parts of the runtime. Since the new interface is only called on architectures that actually support AVX instructions, this separation maintains the portability of the runtime. Some of the tests use #pragma omp simd as a portable way to generate vector instructions across architectures. The construct is independent of the OpenMP runtime. Therefore the tests used base-language threading. Some of the tests directly call into the new runtime functions, since we found no way to actually generate scatter/gather instructions with masks different from 0xFF. Under review as llvm#74636
1 parent 4a11bb4 commit 49e6e3b

27 files changed

+1123
-20
lines changed

compiler-rt/cmake/config-ix.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ check_cxx_compiler_flag(-fno-profile-instr-use COMPILER_RT_HAS_FNO_PROFILE_INSTR
8989
check_cxx_compiler_flag(-fno-coverage-mapping COMPILER_RT_HAS_FNO_COVERAGE_MAPPING_FLAG)
9090
check_cxx_compiler_flag("-Werror -mcrc32" COMPILER_RT_HAS_MCRC32_FLAG)
9191
check_cxx_compiler_flag("-Werror -msse4.2" COMPILER_RT_HAS_MSSE4_2_FLAG)
92+
check_cxx_compiler_flag("-Werror -mavx2" COMPILER_RT_HAS_MAVX2_FLAG)
93+
check_cxx_compiler_flag("-Werror -mavx512f" COMPILER_RT_HAS_MAVX512F_FLAG)
9294
check_cxx_compiler_flag(--sysroot=. COMPILER_RT_HAS_SYSROOT_FLAG)
9395
check_cxx_compiler_flag("-Werror -mcrc" COMPILER_RT_HAS_MCRC_FLAG)
9496
check_cxx_compiler_flag(-fno-partial-inlining COMPILER_RT_HAS_FNO_PARTIAL_INLINING_FLAG)

compiler-rt/lib/tsan/rtl/CMakeLists.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,17 @@ else()
237237
else()
238238
set(TSAN_ASM_SOURCES)
239239
endif()
240+
add_compiler_rt_object_libraries(RTTSanAVX2
241+
ARCHS ${arch}
242+
SOURCES tsan_interface_avx2.cpp
243+
ADDITIONAL_HEADERS tsan_interface_avx2.h
244+
#CFLAGS ${TSAN_RTL_CFLAGS} $<IF:"$COMPILER_RT_HAS_MAVX2_FLAG","-mavx2","">)
245+
CFLAGS ${TSAN_RTL_CFLAGS} $<IF:$<BOOL:${COMPILER_RT_HAS_MAVX2_FLAG}>,-mavx2,"">)
246+
add_compiler_rt_object_libraries(RTTSanAVX512
247+
ARCHS ${arch}
248+
SOURCES tsan_interface_avx512.cpp
249+
ADDITIONAL_HEADERS tsan_interface_avx512.h
250+
CFLAGS ${TSAN_RTL_CFLAGS} $<IF:$<BOOL:${COMPILER_RT_HAS_MAVX512F_FLAG}>,-mavx512f,"">)
240251
add_compiler_rt_runtime(clang_rt.tsan
241252
STATIC
242253
ARCHS ${arch}
@@ -247,6 +258,8 @@ else()
247258
$<TARGET_OBJECTS:RTSanitizerCommonCoverage.${arch}>
248259
$<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
249260
$<TARGET_OBJECTS:RTUbsan.${arch}>
261+
$<TARGET_OBJECTS:RTTSanAVX2.${arch}>
262+
$<TARGET_OBJECTS:RTTSanAVX512.${arch}>
250263
ADDITIONAL_HEADERS ${TSAN_HEADERS}
251264
CFLAGS ${TSAN_RTL_CFLAGS}
252265
PARENT_TARGET tsan)
@@ -270,6 +283,8 @@ else()
270283
$<TARGET_OBJECTS:RTSanitizerCommonCoverage.${arch}>
271284
$<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
272285
$<TARGET_OBJECTS:RTUbsan.${arch}>
286+
$<TARGET_OBJECTS:RTTSanAVX2.${arch}>
287+
$<TARGET_OBJECTS:RTTSanAVX512.${arch}>
273288
ADDITIONAL_HEADERS ${TSAN_HEADERS}
274289
CFLAGS ${TSAN_RTL_DYNAMIC_CFLAGS}
275290
DEFS SANITIZER_SHARED

compiler-rt/lib/tsan/rtl/tsan_interface.cpp

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "tsan_interface.h"
1414
#include "tsan_interface_ann.h"
1515
#include "tsan_rtl.h"
16+
1617
#include "sanitizer_common/sanitizer_internal_defs.h"
1718
#include "sanitizer_common/sanitizer_ptrauth.h"
1819

@@ -42,18 +43,42 @@ void __tsan_write16_pc(void *addr, void *pc) {
4243

4344
// __tsan_unaligned_read/write calls are emitted by compiler.
4445

45-
void __tsan_unaligned_read16(const void *addr) {
46+
template <unsigned int N>
47+
void __tsan_unaligned_readx(const void *addr) {
4648
uptr pc = CALLERPC;
4749
ThreadState *thr = cur_thread();
48-
UnalignedMemoryAccess(thr, pc, (uptr)addr, 8, kAccessRead);
49-
UnalignedMemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessRead);
50+
for (unsigned int i = 0; i < N / 8; i++)
51+
UnalignedMemoryAccess(thr, pc, (uptr)addr + (i * 8), 8, kAccessRead);
5052
}
5153

52-
void __tsan_unaligned_write16(void *addr) {
54+
template <unsigned int N>
55+
void __tsan_unaligned_writex(void *addr) {
5356
uptr pc = CALLERPC;
5457
ThreadState *thr = cur_thread();
55-
UnalignedMemoryAccess(thr, pc, (uptr)addr, 8, kAccessWrite);
56-
UnalignedMemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessWrite);
58+
for (unsigned int i = 0; i < N / 8; i++)
59+
UnalignedMemoryAccess(thr, pc, (uptr)addr + (i * 8), 8, kAccessWrite);
60+
}
61+
62+
void __tsan_unaligned_read16(const void *addr) {
63+
__tsan_unaligned_readx<16>(addr);
64+
}
65+
66+
void __tsan_unaligned_write16(void *addr) { __tsan_unaligned_writex<16>(addr); }
67+
68+
extern "C" void __tsan_unaligned_read32(const void *addr) {
69+
__tsan_unaligned_readx<32>(addr);
70+
}
71+
72+
extern "C" void __tsan_unaligned_write32(void *addr) {
73+
__tsan_unaligned_writex<32>(addr);
74+
}
75+
76+
extern "C" void __tsan_unaligned_read64(const void *addr) {
77+
__tsan_unaligned_readx<64>(addr);
78+
}
79+
80+
extern "C" void __tsan_unaligned_write64(void *addr) {
81+
__tsan_unaligned_writex<64>(addr);
5782
}
5883

5984
extern "C" {

compiler-rt/lib/tsan/rtl/tsan_interface.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,15 @@ SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read2(const void *addr);
5353
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read4(const void *addr);
5454
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read8(const void *addr);
5555
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read16(const void *addr);
56+
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read32(const void *addr);
57+
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read64(const void *addr);
5658

5759
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write2(void *addr);
5860
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write4(void *addr);
5961
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write8(void *addr);
6062
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write16(void *addr);
63+
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write32(void *addr);
64+
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write64(void *addr);
6165

6266
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read1_pc(void *addr, void *pc);
6367
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read2_pc(void *addr, void *pc);

compiler-rt/lib/tsan/rtl/tsan_interface.inc

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,18 @@ void __tsan_read16(void *addr) {
3838
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
3939
}
4040

41+
extern "C" void __tsan_read32(void *addr) {
42+
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
43+
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessRead);
44+
}
45+
46+
extern "C" void __tsan_read64(void *addr) {
47+
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
48+
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessRead);
49+
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 32, kAccessRead);
50+
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 48, kAccessRead);
51+
}
52+
4153
void __tsan_write1(void *addr) {
4254
MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, kAccessWrite);
4355
}
@@ -58,6 +70,21 @@ void __tsan_write16(void *addr) {
5870
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
5971
}
6072

73+
extern "C" void __tsan_write32(void *addr) {
74+
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
75+
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessWrite);
76+
}
77+
78+
extern "C" void __tsan_write64(void *addr) {
79+
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
80+
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessWrite);
81+
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 32, kAccessWrite);
82+
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 48, kAccessWrite);
83+
}
84+
85+
// Our vector instructions
86+
// TODO
87+
6188
void __tsan_read1_pc(void *addr, void *pc) {
6289
MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 1, kAccessRead | kAccessExternalPC);
6390
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#include "tsan_interface_avx2.h"
2+
3+
#include <immintrin.h>
4+
#include <inttypes.h>
5+
#include <stdint.h>
6+
#include <unistd.h>
7+
8+
#include "sanitizer_common/sanitizer_internal_defs.h"
9+
#include "sanitizer_common/sanitizer_ptrauth.h"
10+
#include "tsan_interface_ann.h"
11+
#include "tsan_rtl.h"
12+
13+
#define CALLERPC ((uptr)__builtin_return_address(0))
14+
15+
using namespace __tsan;
16+
17+
#ifdef __AVX__
18+
extern "C" void __tsan_scatter_vector4(__m256i vaddr, int size, uint8_t mask) {
19+
void *addr[4] = {};
20+
_mm256_store_si256((__m256i *)addr, vaddr);
21+
uptr pc = CALLERPC;
22+
ThreadState *thr = cur_thread();
23+
for (int i = 0; i < 4; i++)
24+
if ((mask >> i) & 1)
25+
UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessWrite);
26+
}
27+
28+
extern "C" void __tsan_gather_vector4(__m256i vaddr, int size, uint8_t mask) {
29+
void *addr[4] = {};
30+
_mm256_store_si256((__m256i *)addr, vaddr);
31+
uptr pc = CALLERPC;
32+
ThreadState *thr = cur_thread();
33+
for (int i = 0; i < 4; i++)
34+
if ((mask >> i) & 1)
35+
UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessRead);
36+
}
37+
#endif /*__AVX__*/
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
//===-- tsan_interface_avx2.h ----------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file is a part of ThreadSanitizer (TSan), a race detector.
10+
//
11+
// The functions declared in this header will be inserted by the instrumentation
12+
// module.
13+
// This header can be included by the instrumented program or by TSan tests.
14+
//===----------------------------------------------------------------------===//
15+
#ifndef TSAN_INTERFACE_AVX2_H
16+
#define TSAN_INTERFACE_AVX2_H
17+
18+
#include <immintrin.h>
19+
#include <sanitizer_common/sanitizer_internal_defs.h>
20+
#include <stdint.h>
21+
using __sanitizer::tid_t;
22+
using __sanitizer::uptr;
23+
24+
// This header should NOT include any other headers.
25+
// All functions in this header are extern "C" and start with __tsan_.
26+
27+
#ifdef __cplusplus
28+
extern "C" {
29+
#endif
30+
31+
#if !SANITIZER_GO
32+
# ifdef __AVX__
33+
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_scatter_vector4(__m256i vaddr,
34+
int width,
35+
uint8_t mask);
36+
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_gather_vector4(__m256i vaddr,
37+
int width,
38+
uint8_t mask);
39+
# endif /*__AVX__*/
40+
#endif // SANITIZER_GO
41+
42+
#ifdef __cplusplus
43+
} // extern "C"
44+
#endif
45+
46+
#endif /*TSAN_INTERFACE_AVX2_H*/
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#include "tsan_interface_avx512.h"
2+
3+
#include <immintrin.h>
4+
#include <inttypes.h>
5+
#include <stdint.h>
6+
#include <unistd.h>
7+
8+
#include "sanitizer_common/sanitizer_internal_defs.h"
9+
#include "sanitizer_common/sanitizer_ptrauth.h"
10+
#include "tsan_interface_ann.h"
11+
#include "tsan_rtl.h"
12+
13+
#define CALLERPC ((uptr)__builtin_return_address(0))
14+
15+
using namespace __tsan;
16+
17+
#ifdef __AVX512F__
18+
extern "C" void __tsan_scatter_vector8(__m512i vaddr, int size, uint8_t mask) {
19+
void *addr[8] = {};
20+
__m256i v256_1 = _mm512_extracti64x4_epi64(vaddr, 0);
21+
__m256i v256_2 = _mm512_extracti64x4_epi64(vaddr, 4);
22+
_mm256_store_si256((__m256i *)addr, v256_1);
23+
_mm256_store_si256((__m256i *)&(addr[4]), v256_2);
24+
uptr pc = CALLERPC;
25+
ThreadState *thr = cur_thread();
26+
for (int i = 0; i < 8; i++)
27+
if ((mask >> i) & 1)
28+
UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessWrite);
29+
}
30+
31+
extern "C" void __tsan_gather_vector8(__m512i vaddr, int size, uint8_t mask) {
32+
void *addr[8] = {};
33+
__m256i v256_1 = _mm512_extracti64x4_epi64(vaddr, 0);
34+
__m256i v256_2 = _mm512_extracti64x4_epi64(vaddr, 4);
35+
_mm256_store_si256((__m256i *)addr, v256_1);
36+
_mm256_store_si256((__m256i *)(&addr[4]), v256_2);
37+
uptr pc = CALLERPC;
38+
ThreadState *thr = cur_thread();
39+
for (int i = 0; i < 8; i++)
40+
if ((mask >> i) & 1)
41+
UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessRead);
42+
}
43+
#endif /*__AVX512F__*/
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
//===-- tsan_interface_avx512.h ----------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file is a part of ThreadSanitizer (TSan), a race detector.
10+
//
11+
// The functions declared in this header will be inserted by the instrumentation
12+
// module.
13+
// This header can be included by the instrumented program or by TSan tests.
14+
//===----------------------------------------------------------------------===//
15+
#ifndef TSAN_INTERFACE_AVX512_H
16+
#define TSAN_INTERFACE_AVX512_H
17+
18+
#include <immintrin.h>
19+
#include <sanitizer_common/sanitizer_internal_defs.h>
20+
#include <stdint.h>
21+
using __sanitizer::tid_t;
22+
using __sanitizer::uptr;
23+
24+
// This header should NOT include any other headers.
25+
// All functions in this header are extern "C" and start with __tsan_.
26+
27+
#ifdef __cplusplus
28+
extern "C" {
29+
#endif
30+
31+
#if !SANITIZER_GO
32+
# ifdef __AVX512F__
33+
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_scatter_vector8(__m512i vaddr,
34+
int width,
35+
uint8_t mask);
36+
SANITIZER_INTERFACE_ATTRIBUTE void __tsan_gather_vector8(__m512i vaddr,
37+
int width,
38+
uint8_t mask);
39+
# endif /*__AVX512F__*/
40+
#endif // SANITIZER_GO
41+
42+
#ifdef __cplusplus
43+
} // extern "C"
44+
#endif
45+
46+
#endif /*TSAN_INTERFACE_AVX512_H*/
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
2+
// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
3+
// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
4+
// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
5+
#include "test.h"
6+
7+
#ifndef SIMDLEN
8+
# define SIMDLEN 8
9+
#endif /*SIMDLEN*/
10+
#ifndef TYPE
11+
# define TYPE double
12+
#endif /*TYPE*/
13+
#define LEN 256
14+
#define CHUNK_SIZE 64
15+
16+
TYPE A[2 * LEN];
17+
TYPE c;
18+
19+
void *Thread(intptr_t offset) {
20+
for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
21+
#pragma omp simd simdlen(SIMDLEN)
22+
for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
23+
A[j] += c;
24+
}
25+
barrier_wait(&barrier);
26+
return NULL;
27+
}
28+
29+
void *Thread1(void *x) { return Thread(0); }
30+
31+
void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
32+
33+
int main() {
34+
barrier_init(&barrier, 2);
35+
pthread_t t[2];
36+
pthread_create(&t[0], NULL, Thread1, NULL);
37+
pthread_create(&t[1], NULL, Thread2, NULL);
38+
pthread_join(t[0], NULL);
39+
pthread_join(t[1], NULL);
40+
fprintf(stderr, "DONE\n");
41+
return 0;
42+
}
43+
44+
// CHECK-NOT: WARNING: ThreadSanitizer: data race
45+
// CHECK-NOT: SUMMARY: ThreadSanitizer: data race{{.*}}Thread

0 commit comments

Comments
 (0)