Skip to content

[AMDGPU] Utilities to asan instrument memory instructions. #98863

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
332 changes: 332 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,332 @@
//===AMDGPUAsanInstrumentation.cpp - ASAN related helper functions===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-------------------------------------------------------------===//

#include "AMDGPUAsanInstrumentation.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing file header


#define DEBUG_TYPE "amdgpu-asan-instrumentation"

using namespace llvm;

namespace llvm {
namespace AMDGPU {

static uint64_t getRedzoneSizeForScale(int AsanScale) {
// Redzone used for stack and globals is at least 32 bytes.
// For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
return std::max(32U, 1U << AsanScale);
}

static uint64_t getMinRedzoneSizeForGlobal(int AsanScale) {
return getRedzoneSizeForScale(AsanScale);
}

uint64_t getRedzoneSizeForGlobal(int AsanScale, uint64_t SizeInBytes) {
constexpr uint64_t kMaxRZ = 1 << 18;
const uint64_t MinRZ = getMinRedzoneSizeForGlobal(AsanScale);

uint64_t RZ = 0;
if (SizeInBytes <= MinRZ / 2) {
// Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is
// at least 32 bytes, optimize when SizeInBytes is less than or equal to
// half of MinRZ.
RZ = MinRZ - SizeInBytes;
} else {
// Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ);

// Round up to multiple of MinRZ.
if (SizeInBytes % MinRZ)
RZ += MinRZ - (SizeInBytes % MinRZ);
}

assert((RZ + SizeInBytes) % MinRZ == 0);

return RZ;
}

static size_t TypeStoreSizeToSizeIndex(uint32_t TypeSize) {
size_t Res = llvm::countr_zero(TypeSize / 8);
return Res;
}

static Instruction *genAMDGPUReportBlock(Module &M, IRBuilder<> &IRB,
Value *Cond, bool Recover) {
Value *ReportCond = Cond;
if (!Recover) {
auto *Ballot =
IRB.CreateIntrinsic(Intrinsic::amdgcn_ballot, IRB.getInt64Ty(), {Cond});
ReportCond = IRB.CreateIsNotNull(Ballot);
}

auto *Trm = SplitBlockAndInsertIfThen(
ReportCond, &*IRB.GetInsertPoint(), false,
MDBuilder(M.getContext()).createUnlikelyBranchWeights());
Trm->getParent()->setName("asan.report");

if (Recover)
return Trm;

Trm = SplitBlockAndInsertIfThen(Cond, Trm, false);
IRB.SetInsertPoint(Trm);
return IRB.CreateIntrinsic(Intrinsic::amdgcn_unreachable, {}, {});
}

static Value *createSlowPathCmp(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
Value *AddrLong, Value *ShadowValue,
uint32_t TypeStoreSize, int AsanScale) {
uint64_t Granularity = static_cast<uint64_t>(1) << AsanScale;
// Addr & (Granularity - 1)
Value *LastAccessedByte =
IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
// (Addr & (Granularity - 1)) + size - 1
if (TypeStoreSize / 8 > 1)
LastAccessedByte = IRB.CreateAdd(
LastAccessedByte, ConstantInt::get(IntptrTy, TypeStoreSize / 8 - 1));
Comment on lines +88 to +89
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use llvm.ptrmask?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

createSlowPathCmp function is creating a cmp operation to check if the last accessed byte is >= to the shadow value.
llvm.ptrmask masks bits of pointer . I'm not sure how it can be used here.

// (uint8_t) ((Addr & (Granularity-1)) + size - 1)
LastAccessedByte =
IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false);
// ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
}

static Instruction *generateCrashCode(Module &M, IRBuilder<> &IRB,
Type *IntptrTy, Instruction *InsertBefore,
Value *Addr, bool IsWrite,
size_t AccessSizeIndex,
Value *SizeArgument, bool Recover) {
IRB.SetInsertPoint(InsertBefore);
CallInst *Call = nullptr;
SmallString<128> kAsanReportErrorTemplate{"__asan_report_"};
SmallString<64> TypeStr{IsWrite ? "store" : "load"};
SmallString<64> EndingStr{Recover ? "_noabort" : ""};

SmallString<128> AsanErrorCallbackSizedString;
raw_svector_ostream AsanErrorCallbackSizedOS(AsanErrorCallbackSizedString);
AsanErrorCallbackSizedOS << kAsanReportErrorTemplate << TypeStr << "_n"
<< EndingStr;

SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy};
AttributeList AL2;
FunctionCallee AsanErrorCallbackSized = M.getOrInsertFunction(
AsanErrorCallbackSizedOS.str(),
FunctionType::get(IRB.getVoidTy(), Args2, false), AL2);
SmallVector<Type *, 2> Args1{1, IntptrTy};
AttributeList AL1;

SmallString<128> AsanErrorCallbackString;
raw_svector_ostream AsanErrorCallbackOS(AsanErrorCallbackString);
AsanErrorCallbackOS << kAsanReportErrorTemplate << TypeStr
<< (1ULL << AccessSizeIndex) << EndingStr;

FunctionCallee AsanErrorCallback = M.getOrInsertFunction(
AsanErrorCallbackOS.str(),
FunctionType::get(IRB.getVoidTy(), Args1, false), AL1);
if (SizeArgument) {
Call = IRB.CreateCall(AsanErrorCallbackSized, {Addr, SizeArgument});
} else {
Call = IRB.CreateCall(AsanErrorCallback, Addr);
}

Call->setCannotMerge();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These utilities are taken from the AddressSanitizer.cpp and haven't changed them much. I assume, setCannotMerge would make the optimizations to not optimize/fold the instruction.

return Call;
}

static Value *memToShadow(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
Value *Shadow, int AsanScale, uint32_t AsanOffset) {
// Shadow >> scale
Shadow = IRB.CreateLShr(Shadow, AsanScale);
if (AsanOffset == 0)
return Shadow;
// (Shadow >> scale) | offset
Value *ShadowBase = ConstantInt::get(IntptrTy, AsanOffset);
return IRB.CreateAdd(Shadow, ShadowBase);
}

void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns,
Instruction *InsertBefore, Value *Addr,
MaybeAlign Alignment, uint32_t TypeStoreSize,
bool IsWrite, Value *SizeArgument, bool UseCalls,
bool Recover, int AsanScale, int AsanOffset) {
Type *AddrTy = Addr->getType();
Type *IntptrTy = M.getDataLayout().getIntPtrType(
M.getContext(), AddrTy->getPointerAddressSpace());
IRB.SetInsertPoint(InsertBefore);
size_t AccessSizeIndex = TypeStoreSizeToSizeIndex(TypeStoreSize);
Type *ShadowTy = IntegerType::get(M.getContext(),
std::max(8U, TypeStoreSize >> AsanScale));
Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
Value *AddrLong = IRB.CreatePtrToInt(Addr, IntptrTy);
Value *ShadowPtr =
memToShadow(M, IRB, IntptrTy, AddrLong, AsanScale, AsanOffset);
const uint64_t ShadowAlign =
std::max<uint64_t>(Alignment.valueOrOne().value() >> AsanScale, 1);
Comment on lines +166 to +167
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keep this all as Align?

Value *ShadowValue = IRB.CreateAlignedLoad(
ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy), Align(ShadowAlign));
Value *Cmp = IRB.CreateIsNotNull(ShadowValue);
auto *Cmp2 = createSlowPathCmp(M, IRB, IntptrTy, AddrLong, ShadowValue,
TypeStoreSize, AsanScale);
Cmp = IRB.CreateAnd(Cmp, Cmp2);
Instruction *CrashTerm = genAMDGPUReportBlock(M, IRB, Cmp, Recover);
Instruction *Crash =
generateCrashCode(M, IRB, IntptrTy, CrashTerm, AddrLong, IsWrite,
AccessSizeIndex, SizeArgument, Recover);
Crash->setDebugLoc(OrigIns->getDebugLoc());
return;
}

void getInterestingMemoryOperands(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm pretty sure there's a utility to grab all pointer operands from recognized memory instructions

Module &M, Instruction *I,
SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
const DataLayout &DL = M.getDataLayout();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
LI->getType(), LI->getAlign());
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
SI->getValueOperand()->getType(), SI->getAlign());
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
RMW->getValOperand()->getType(), std::nullopt);
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
XCHG->getCompareOperand()->getType(),
std::nullopt);
} else if (auto CI = dyn_cast<CallInst>(I)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is missing MemTransferInst handling

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MemTransferInsts would have been replaced with their asan version of MemTransfer insts by asan pass. So, not required to add them here?

switch (CI->getIntrinsicID()) {
case Intrinsic::masked_load:
case Intrinsic::masked_store:
case Intrinsic::masked_gather:
case Intrinsic::masked_scatter: {
bool IsWrite = CI->getType()->isVoidTy();
// Masked store has an initial operand for the value.
unsigned OpOffset = IsWrite ? 1 : 0;
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
MaybeAlign Alignment = Align(1);
// Otherwise no alignment guarantees. We probably got Undef.
if (auto *Op = dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
Alignment = Op->getMaybeAlignValue();
Value *Mask = CI->getOperand(2 + OpOffset);
Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask);
break;
}
case Intrinsic::masked_expandload:
case Intrinsic::masked_compressstore: {
bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_compressstore;
unsigned OpOffset = IsWrite ? 1 : 0;
auto BasePtr = CI->getOperand(OpOffset);
MaybeAlign Alignment = BasePtr->getPointerAlignment(DL);
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
IRBuilder<> IB(I);
Value *Mask = CI->getOperand(1 + OpOffset);
Type *IntptrTy = M.getDataLayout().getIntPtrType(
M.getContext(), BasePtr->getType()->getPointerAddressSpace());
// Use the popcount of Mask as the effective vector length.
Type *ExtTy = VectorType::get(IntptrTy, cast<VectorType>(Ty));
Value *ExtMask = IB.CreateZExt(Mask, ExtTy);
Value *EVL = IB.CreateAddReduce(ExtMask);
Value *TrueMask = ConstantInt::get(Mask->getType(), 1);
Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, TrueMask,
EVL);
break;
}
case Intrinsic::vp_load:
case Intrinsic::vp_store:
case Intrinsic::experimental_vp_strided_load:
case Intrinsic::experimental_vp_strided_store: {
auto *VPI = cast<VPIntrinsic>(CI);
unsigned IID = CI->getIntrinsicID();
bool IsWrite = CI->getType()->isVoidTy();
unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
MaybeAlign Alignment = VPI->getOperand(PtrOpNo)->getPointerAlignment(DL);
Value *Stride = nullptr;
if (IID == Intrinsic::experimental_vp_strided_store ||
IID == Intrinsic::experimental_vp_strided_load) {
Stride = VPI->getOperand(PtrOpNo + 1);
// Use the pointer alignment as the element alignment if the stride is a
// mutiple of the pointer alignment. Otherwise, the element alignment
// should be Align(1).
unsigned PointerAlign = Alignment.valueOrOne().value();
if (!isa<ConstantInt>(Stride) ||
cast<ConstantInt>(Stride)->getZExtValue() % PointerAlign != 0)
Alignment = Align(1);
}
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
VPI->getMaskParam(), VPI->getVectorLengthParam(),
Stride);
break;
}
case Intrinsic::vp_gather:
case Intrinsic::vp_scatter: {
auto *VPI = cast<VPIntrinsic>(CI);
unsigned IID = CI->getIntrinsicID();
bool IsWrite = IID == Intrinsic::vp_scatter;
unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
MaybeAlign Alignment = VPI->getPointerAlignment();
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
VPI->getMaskParam(),
VPI->getVectorLengthParam());
break;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing all target intrinsics. There's a TLI hook to query pointer arguments

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have added the intrinsics I could find in the backend which does memory operations. Haven't found the TLI hook that fits here.

case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_ptr_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format:
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
case Intrinsic::amdgcn_raw_tbuffer_load:
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
case Intrinsic::amdgcn_struct_buffer_load:
case Intrinsic::amdgcn_struct_ptr_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load_format:
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
case Intrinsic::amdgcn_struct_tbuffer_load:
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
case Intrinsic::amdgcn_s_buffer_load:
case Intrinsic::amdgcn_global_load_tr_b64:
case Intrinsic::amdgcn_global_load_tr_b128: {
unsigned PtrOpNo = 0;
bool IsWrite = false;
Type *Ty = CI->getType();
Value *Ptr = CI->getArgOperand(PtrOpNo);
MaybeAlign Alignment = Ptr->getPointerAlignment(DL);
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment);
break;
}
case Intrinsic::amdgcn_raw_tbuffer_store:
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
case Intrinsic::amdgcn_raw_buffer_store:
case Intrinsic::amdgcn_raw_ptr_buffer_store:
case Intrinsic::amdgcn_raw_buffer_store_format:
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_ptr_buffer_store:
case Intrinsic::amdgcn_struct_buffer_store_format:
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
case Intrinsic::amdgcn_struct_tbuffer_store:
case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
unsigned PtrOpNo = 1;
bool IsWrite = true;
Value *Ptr = CI->getArgOperand(PtrOpNo);
Type *Ty = Ptr->getType();
MaybeAlign Alignment = Ptr->getPointerAlignment(DL);
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment);
break;
}
default:
for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
if (Type *Ty = CI->getParamByRefType(ArgNo)) {
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
} else if (Type *Ty = CI->getParamByValType(ArgNo)) {
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
}
}
}
}
}
} // end namespace AMDGPU
} // end namespace llvm
60 changes: 60 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//===AMDGPUAsanInstrumentation.h - ASAN helper functions -*- C++- *===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===--------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H

#include "AMDGPU.h"
#include "AMDGPUBaseInfo.h"
#include "AMDGPUMemoryUtils.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/OptimizedStructLayout.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"

namespace llvm {
namespace AMDGPU {

/// Given SizeInBytes of the Value to be instrunmented,
/// Returns the redzone size corresponding to it.
uint64_t getRedzoneSizeForGlobal(int Scale, uint64_t SizeInBytes);

/// Instrument the memory operand Addr.
/// Generates report blocks that catch the addressing errors.
void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns,
Instruction *InsertBefore, Value *Addr,
MaybeAlign Alignment, uint32_t TypeStoreSize,
bool IsWrite, Value *SizeArgument, bool UseCalls,
bool Recover, int Scale, int Offset);

/// Get all the memory operands from the instruction
/// that needs to be instrumented
void getInterestingMemoryOperands(
Module &M, Instruction *I,
SmallVectorImpl<InterestingMemoryOperand> &Interesting);

} // end namespace AMDGPU
} // end namespace llvm

#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
add_llvm_component_library(LLVMAMDGPUUtils
AMDGPUAsanInstrumentation.cpp
AMDGPUAsmUtils.cpp
AMDGPUBaseInfo.cpp
AMDGPUDelayedMCExpr.cpp
Expand Down
Loading