Skip to content

[DirectX] Add support for typedBufferLoad and Store for RWBuffer<double2> and RWBuffer<double> #139996

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 30, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_fadd:
return true;
case Intrinsic::dx_resource_load_typedbuffer:
// We need to handle doubles and vector of doubles.
return F.getReturnType()
->getStructElementType(0)
->getScalarType()
->isDoubleTy();
case Intrinsic::dx_resource_store_typedbuffer:
// We need to handle doubles and vector of doubles.
return F.getFunctionType()->getParamType(2)->getScalarType()->isDoubleTy();
}
return false;
}
Expand Down Expand Up @@ -532,6 +541,110 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
return Builder.CreateFMul(X, PiOver180);
}

static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
IRBuilder<> Builder(Orig);

Type *BufferTy = Orig->getType()->getStructElementType(0);
assert(BufferTy->getScalarType()->isDoubleTy() &&
"Only expand double or double2");

unsigned ExtractNum = 2;
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
assert(VT->getNumElements() == 2 &&
"TypedBufferLoad double vector has wrong size");
ExtractNum = 4;
}

Type *Ty = VectorType::get(Builder.getInt32Ty(), ExtractNum, false);

Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
CallInst *Load =
Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_typedbuffer,
{Orig->getOperand(0), Orig->getOperand(1)});

// extract the buffer load's result
Value *Extract = Builder.CreateExtractValue(Load, {0});

SmallVector<Value *> ExtractElements;
for (unsigned I = 0; I < ExtractNum; ++I)
ExtractElements.push_back(
Builder.CreateExtractElement(Extract, Builder.getInt32(I)));

// combine into double(s)
Value *Result = PoisonValue::get(BufferTy);
for (unsigned I = 0; I < ExtractNum; I += 2) {
Value *Dbl =
Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
{ExtractElements[I], ExtractElements[I + 1]});
if (ExtractNum == 4)
Result =
Builder.CreateInsertElement(Result, Dbl, Builder.getInt32(I / 2));
else
Result = Dbl;
}

Value *CheckBit = nullptr;
for (User *U : make_early_inc_range(Orig->users())) {
auto *EVI = cast<ExtractValueInst>(U);
ArrayRef<unsigned> Indices = EVI->getIndices();
assert(Indices.size() == 1);

if (Indices[0] == 0) {
// Use of the value(s)
EVI->replaceAllUsesWith(Result);
} else {
// Use of the check bit
assert(Indices[0] == 1 && "Unexpected type for typedbufferload");
if (!CheckBit)
CheckBit = Builder.CreateExtractValue(Load, {1});
EVI->replaceAllUsesWith(CheckBit);
}
EVI->eraseFromParent();
}
Orig->eraseFromParent();
return true;
}

static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
IRBuilder<> Builder(Orig);

Type *BufferTy = Orig->getFunctionType()->getParamType(2);
assert(BufferTy->getScalarType()->isDoubleTy() &&
"Only expand double or double2");

unsigned ExtractNum = 2;
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
assert(VT->getNumElements() == 2 &&
"TypedBufferStore double vector has wrong size");
ExtractNum = 4;
}

Type *SplitElementTy = Builder.getInt32Ty();
if (ExtractNum == 4)
SplitElementTy = VectorType::get(SplitElementTy, 2, false);

// split our double(s)
auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
Orig->getOperand(2));
// create our vector
Value *LowBits = Builder.CreateExtractValue(Split, 0);
Value *HighBits = Builder.CreateExtractValue(Split, 1);
Value *Val;
if (ExtractNum == 2) {
Val = PoisonValue::get(VectorType::get(SplitElementTy, 2, false));
Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
} else
Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});

Builder.CreateIntrinsic(Builder.getVoidTy(),
Intrinsic::dx_resource_store_typedbuffer,
{Orig->getOperand(0), Orig->getOperand(1), Val});
Orig->eraseFromParent();
return true;
}

static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic) {
if (ClampIntrinsic == Intrinsic::dx_uclamp)
return Intrinsic::umax;
Expand Down Expand Up @@ -660,6 +773,14 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::dx_radians:
Result = expandRadiansIntrinsic(Orig);
break;
case Intrinsic::dx_resource_load_typedbuffer:
if (expandTypedBufferLoadIntrinsic(Orig))
return true;
break;
case Intrinsic::dx_resource_store_typedbuffer:
if (expandTypedBufferStoreIntrinsic(Orig))
return true;
break;
case Intrinsic::usub_sat:
Result = expandUsubSat(Orig);
break;
Expand Down
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/DirectX/BufferLoad.ll
Original file line number Diff line number Diff line change
Expand Up @@ -197,4 +197,36 @@ define void @loadv4i16() {
ret void
}

define void @loadf64() {
; show dxil op lower can handle typedbuffer load where target is double but load type is <2 x i32>
; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) #0
%buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
i32 0, i32 1, i32 1, i32 0, i1 false)

; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]], %dx.types.ResourceProperties { i32 4106, i32 266 }) #0
%load = call { <2 x i32>, i1 } @llvm.dx.resource.load.typedbuffer(
target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)

; CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[BA]], i32 0, i32 undef) #1
%val = extractvalue { <2 x i32>, i1 } %load, 0
ret void
}

define void @loadv2f64() {
; show dxil op lower can handle typedbuffer load where target is double2 but load type is <4 x i32>
; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) #0
%buffer = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
i32 0, i32 1, i32 1, i32 0, i1 false)

; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]], %dx.types.ResourceProperties { i32 4106, i32 522 }) #0
%load = call { <4 x i32>, i1 } @llvm.dx.resource.load.typedbuffer(
target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)

; CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[BA]], i32 0, i32 undef) #1
%val = extractvalue { <4 x i32>, i1 } %load, 0
ret void
}

; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(read) {{.*}}}
91 changes: 91 additions & 0 deletions llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s

target triple = "dxil-pc-shadermodel6.6-compute"

define void @loadf64() {
; check the handle from binding is unchanged
; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", double, 1, 0, 0)
; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false)
%buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
i32 0, i32 1, i32 1, i32 0, i1 false)

; check we load an <2 x i32> instead of a double
; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.typedbuffer
; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(
; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0)
%load0 = call {double, i1} @llvm.dx.resource.load.typedbuffer(
target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)

; check we extract the two i32 and construct a double
; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0
; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1
; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
; CHECK-NOT: extractvalue { double, i1 }
%data0 = extractvalue {double, i1} %load0, 0
ret void
}

define void @loadv2f64() {
; check the handle from binding is unchanged
; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false)
%buffer = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
i32 0, i32 1, i32 1, i32 0, i1 false)

; check we load an <4 x i32> instead of a double2
; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 }
; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v2f64_1_0_0t(
; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0)
%load0 = call { <2 x double>, i1 } @llvm.dx.resource.load.typedbuffer(
target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)

; check we extract the 4 i32 and construct a <2 x double>
; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0
; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0
; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1
; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2
; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3
; CHECK: [[Dbl1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]])
; CHECK: [[Vec:%.*]] = insertelement <2 x double> poison, double [[Dbl1]], i32 0
; CHECK: [[Dbl2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]])
; CHECK: [[Vec2:%.*]] = insertelement <2 x double> [[Vec]], double [[Dbl2]], i32 1
; CHECK-NOT: extractvalue { <2 x double>, i1 }
%data0 = extractvalue { <2 x double>, i1 } %load0, 0
ret void
}

; show we properly handle extracting the check bit
define void @loadf64WithCheckBit() {
; check the handle from binding is unchanged
; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", double, 1, 0, 0)
; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false)
%buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
i32 0, i32 1, i32 1, i32 0, i1 false)

; check we load an <2 x i32> instead of a double
; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.typedbuffer
; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(
; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0)
%load0 = call {double, i1} @llvm.dx.resource.load.typedbuffer(
target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)

; check we extract the two i32 and construct a double
; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0
; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1
; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
%data0 = extractvalue {double, i1} %load0, 0
; CHECK: extractvalue { <2 x i32>, i1 } [[L0]], 1
; CHECK-NOT: extractvalue { double, i1 }
%cb = extractvalue {double, i1} %load0, 1
ret void
}
41 changes: 41 additions & 0 deletions llvm/test/CodeGen/DirectX/BufferStore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,44 @@ define void @store_scalarized_floats(float %data0, float %data1, float %data2, f

ret void
}

define void @storef64(<2 x i32> %0) {
; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217,
; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]]

%buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0)
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
i32 0, i32 0, i32 1, i32 0, i1 false)

; The temporary casts should all have been cleaned up
; CHECK-NOT: %dx.resource.casthandle

; CHECK: [[D0:%.*]] = extractelement <2 x i32> %0, i32 0
; CHECK: [[D1:%.*]] = extractelement <2 x i32> %0, i32 1
; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[BA]], i32 0, i32 undef, i32 %2, i32 %3, i32 %2, i32 %2, i8 15)
call void @llvm.dx.resource.store.typedbuffer(
target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0, <2 x i32> %0)
ret void
}

define void @storev2f64(<4 x i32> %0) {
; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217,
; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]]

%buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
i32 0, i32 0, i32 1, i32 0, i1 false)

; The temporary casts should all have been cleaned up
; CHECK-NOT: %dx.resource.casthandle

; CHECK: [[D0:%.*]] = extractelement <4 x i32> %0, i32 0
; CHECK: [[D1:%.*]] = extractelement <4 x i32> %0, i32 1
; CHECK: [[D2:%.*]] = extractelement <4 x i32> %0, i32 2
; CHECK: [[D3:%.*]] = extractelement <4 x i32> %0, i32 3
; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[BA]], i32 0, i32 undef, i32 [[D0]], i32 [[D1]], i32 [[D2]], i32 [[D3]], i8 15)
call void @llvm.dx.resource.store.typedbuffer(
target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0,
<4 x i32> %0)
ret void
}
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s

target triple = "dxil-pc-shadermodel6.6-compute"

define void @storef64(double %0) {
; CHECK: [[B:%.*]] = tail call target("dx.TypedBuffer", double, 1, 0, 0)
; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false)
%buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0)
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
i32 0, i32 0, i32 1, i32 0, i1 false)

; check we split the double and store the lo and hi bits
; CHECK: [[SD:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double %0)
; CHECK: [[Lo:%.*]] = extractvalue { i32, i32 } [[SD]], 0
; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1
; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i32 0
; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i32 1
; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_f64_1_0_0t.v2i32(
; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec2]])
call void @llvm.dx.resource.store.typedbuffer(
target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0,
double %0)
ret void
}


define void @storev2f64(<2 x double> %0) {
; CHECK: [[B:%.*]] = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false)
%buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
i32 0, i32 0, i32 1, i32 0, i1 false)

; CHECK: [[SD:%.*]] = call { <2 x i32>, <2 x i32> }
; CHECK-SAME: @llvm.dx.splitdouble.v2i32(<2 x double> %0)
; CHECK: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0
; CHECK: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1
; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t.v4i32(
; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0, <4 x i32> [[Vec]])
call void @llvm.dx.resource.store.typedbuffer(
target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0,
<2 x double> %0)
ret void
}
Loading