Skip to content

Commit 46a6b48

Browse files
committed
Test buffered-evaluation of CUDA kernels
1 parent a7d7e79 commit 46a6b48

File tree

2 files changed

+27
-15
lines changed

2 files changed

+27
-15
lines changed

ext/DynamicExpressionsCUDAExt.jl

-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ Base.similar(x::FakeCuArray, dims::Integer...) = FakeCuArray(similar(x.a, dims..
1515
Base.getindex(x::FakeCuArray, i::Int...) = getindex(x.a, i...)
1616
Base.setindex!(x::FakeCuArray, v, i::Int...) = setindex!(x.a, v, i...)
1717
Base.size(x::FakeCuArray) = size(x.a)
18-
Base.Array(x::FakeCuArray) = Array(x.a)
1918

2019
const MaybeCuArray{T,N} = Union{CuArray{T,N},FakeCuArray{T,N}}
2120

test/test_cuda.jl

+27-14
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,34 @@ let
1616
)
1717
x1, x2, x3 = (i -> Node(Float64; feature=i)).(1:3)
1818

19-
for T in (Float32, Float64, ComplexF64), num_trees in (1, 2, 3), seed in 0:10
19+
for T in (Float32, Float64, ComplexF64), ntrees in (1, 2, 3), seed in 0:10
2020
Random.seed!(seed)
21-
num_rows = rand(10:30)
22-
nodes_per = rand(10:25, num_trees)
23-
trees = ntuple(
24-
i -> gen_random_tree_fixed_size(nodes_per[i], operators, 3, T), num_trees
25-
)
26-
X = randn(T, 3, num_rows)
27-
y, completed = eval_tree_array(trees, X, operators)
28-
gpu_y, gpu_completed = eval_tree_array(trees, FakeCuArray(X), operators)
29-
gpu_y = Array.(gpu_y)
30-
31-
for i in eachindex(completed, gpu_completed)
32-
if completed[i]
33-
@test y[i] gpu_y[i]
21+
22+
nrow = rand(10:30)
23+
nnodes = rand(10:25, ntrees)
24+
25+
buffer = rand(Bool) ? ones(Int32, 8, sum(nnodes)) : nothing
26+
gpu_buffer = rand(Bool) ? FakeCuArray(ones(Int32, 8, sum(nnodes))) : nothing
27+
gpu_workspace = rand(Bool) ? FakeCuArray(ones(T, nrow + 1, sum(nnodes))) : nothing
28+
29+
trees = ntuple(i -> gen_random_tree_fixed_size(nnodes[i], operators, 3, T), ntrees)
30+
X = randn(T, 3, nrow)
31+
if ntrees > 1
32+
y, completed = eval_tree_array(trees, X, operators)
33+
gpu_y, gpu_completed = eval_tree_array(
34+
trees, FakeCuArray(X), operators; buffer, gpu_workspace, gpu_buffer
35+
)
36+
37+
for i in eachindex(completed, gpu_completed)
38+
if completed[i]
39+
@test y[i] gpu_y[i]
40+
end
41+
end
42+
else
43+
y, completed = eval_tree_array(only(trees), X, operators)
44+
gpu_y, gpu_completed = eval_tree_array(only(trees), FakeCuArray(X), operators)
45+
if completed
46+
@test y gpu_y
3447
end
3548
end
3649
end

0 commit comments

Comments
 (0)