Skip to content

Commit 676ad86

Browse files
committed
Add test for no buffer recreation
1 parent 8b4cbf3 commit 676ad86

File tree

3 files changed

+130
-49
lines changed

3 files changed

+130
-49
lines changed

ext/DynamicExpressionsCUDAExt.jl

+14-5
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,15 @@ function eval_tree_array(
3535
buffer=nothing,
3636
gpu_workspace=nothing,
3737
gpu_buffer=nothing,
38+
roots=nothing,
39+
num_nodes=nothing,
40+
num_launches=nothing,
41+
update_buffers::Val{_update_buffers}=Val(true),
3842
kws...,
39-
) where {T<:Number,N<:AbstractExpressionNode{T}}
40-
(; val, execution_order, roots, buffer, num_nodes) = as_array(Int32, trees; buffer)
41-
num_launches = maximum(execution_order)
43+
) where {T<:Number,N<:AbstractExpressionNode{T},_update_buffers}
44+
if _update_buffers
45+
(; val, roots, buffer, num_nodes, num_launches) = as_array(Int32, trees; buffer)
46+
end
4247
num_elem = size(gcX, 2)
4348

4449
## The following array is our "workspace" for
@@ -51,10 +56,14 @@ function eval_tree_array(
5156
gpu_workspace
5257
end
5358
gval = @view gworkspace[end, :]
54-
copyto!(gval, val)
59+
if _update_buffers
60+
copyto!(gval, val)
61+
end
5562

5663
## Index arrays (much faster to have `@view` here)
57-
gbuffer = if gpu_buffer === nothing
64+
gbuffer = if !_update_buffers
65+
gpu_buffer
66+
elseif gpu_buffer === nothing
5867
to_device(buffer, gcX)
5968
else
6069
copyto!(gpu_buffer, buffer)

src/AsArray.jl

+11-1
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@ using ..EquationModule: AbstractExpressionNode, tree_mapreduce, count_nodes
44

55
function as_array(
66
::Type{I},
7-
trees::Union{Tuple{N,Vararg{N}},AbstractVector{N}};
7+
trees::Union{N,Tuple{N,Vararg{N}},AbstractVector{N}};
88
buffer::Union{AbstractArray,Nothing}=nothing,
99
) where {T,N<:AbstractExpressionNode{T},I}
10+
if trees isa N
11+
return as_array(I, (trees,); buffer=buffer)
12+
end
1013
each_num_nodes = (t -> count_nodes(t; break_sharing=Val(true))).(trees)
1114
num_nodes = sum(each_num_nodes)
1215

@@ -33,6 +36,7 @@ function as_array(
3336
constant = @view buffer[8, :]
3437

3538
cursor = Ref(zero(I))
39+
num_launches = zero(I)
3640
for (root, tree) in zip(roots, trees)
3741
@assert root == cursor[] + 1
3842
tree_mapreduce(
@@ -70,6 +74,11 @@ function as_array(
7074
end
7175
execution_order[parent.id] = parent_execution_order
7276

77+
# Global number of launches equal to maximum execution order
78+
if parent_execution_order > num_launches
79+
num_launches = parent_execution_order
80+
end
81+
7382
(id=parent.id, order=parent_execution_order)
7483
end,
7584
tree;
@@ -84,6 +93,7 @@ function as_array(
8493
feature,
8594
op,
8695
execution_order,
96+
num_launches,
8797
idx_self,
8898
idx_l,
8999
idx_r,

test/test_cuda.jl

+105-43
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using DynamicExpressions
2+
using DynamicExpressions.AsArrayModule: as_array
23
using CUDA
34
using Random
45

@@ -10,55 +11,116 @@ include("tree_gen_utils.jl")
1011
safe_sin(x) = isfinite(x) ? sin(x) : convert(eltype(x), NaN)
1112
safe_cos(x) = isfinite(x) ? cos(x) : convert(eltype(x), NaN)
1213

13-
let
14-
operators = OperatorEnum(;
15-
binary_operators=[+, -, *, /], unary_operators=[safe_sin, safe_cos]
16-
)
17-
x1, x2, x3 = (i -> Node(Float64; feature=i)).(1:3)
18-
19-
for T in (Float32, Float64, ComplexF64), ntrees in (1, 2, 3), seed in 0:10
20-
Random.seed!(seed)
21-
22-
nrow = rand(10:30)
23-
nnodes = rand(10:25, ntrees)
24-
use_tuple = rand(Bool)
25-
26-
buffer = rand(Bool) ? ones(Int32, 8, sum(nnodes)) : nothing
27-
gpu_buffer = rand(Bool) ? FakeCuArray(ones(Int32, 8, sum(nnodes))) : nothing
28-
gpu_workspace = rand(Bool) ? FakeCuArray(ones(T, nrow + 1, sum(nnodes))) : nothing
29-
30-
trees = ntuple(i -> gen_random_tree_fixed_size(nnodes[i], operators, 3, T), ntrees)
31-
trees = use_tuple ? trees : collect(trees)
32-
X = randn(T, 3, nrow)
33-
if ntrees > 1
34-
y, completed = @inferred eval_tree_array(trees, X, operators)
35-
gpu_y, gpu_completed = @inferred eval_tree_array(
36-
trees, FakeCuArray(X), operators; buffer, gpu_workspace, gpu_buffer
14+
@testset "Random evals" begin
15+
let
16+
operators = OperatorEnum(;
17+
binary_operators=[+, -, *, /], unary_operators=[safe_sin, safe_cos]
18+
)
19+
x1, x2, x3 = (i -> Node(Float64; feature=i)).(1:3)
20+
21+
for T in (Float32, Float64, ComplexF64), ntrees in (1, 2, 3), seed in 0:10
22+
Random.seed!(seed)
23+
24+
nrow = rand(10:30)
25+
nnodes = rand(10:25, ntrees)
26+
use_tuple = rand(Bool)
27+
28+
buffer = rand(Bool) ? ones(Int32, 8, sum(nnodes)) : nothing
29+
gpu_buffer = rand(Bool) ? FakeCuArray(ones(Int32, 8, sum(nnodes))) : nothing
30+
gpu_workspace =
31+
rand(Bool) ? FakeCuArray(ones(T, nrow + 1, sum(nnodes))) : nothing
32+
33+
trees = ntuple(
34+
i -> gen_random_tree_fixed_size(nnodes[i], operators, 3, T), ntrees
3735
)
36+
trees = use_tuple ? trees : collect(trees)
37+
X = randn(T, 3, nrow)
38+
if ntrees > 1
39+
y, completed = @inferred eval_tree_array(trees, X, operators)
40+
gpu_y, gpu_completed = @inferred eval_tree_array(
41+
trees, FakeCuArray(X), operators; buffer, gpu_workspace, gpu_buffer
42+
)
3843

39-
# Should give same result either way
40-
for i in eachindex(completed, gpu_completed)
41-
if completed[i]
42-
@test y[i] gpu_y[i]
44+
# Should give same result either way
45+
for i in eachindex(completed, gpu_completed)
46+
if completed[i]
47+
@test y[i] gpu_y[i]
48+
end
4349
end
44-
end
4550

46-
# Should return same type as input
47-
if use_tuple
48-
@test y isa Tuple
49-
@test gpu_y isa Tuple
51+
# Should return same type as input
52+
if use_tuple
53+
@test y isa Tuple
54+
@test gpu_y isa Tuple
55+
else
56+
@test y isa Vector
57+
@test gpu_y isa Vector
58+
end
5059
else
51-
@test y isa Vector
52-
@test gpu_y isa Vector
53-
end
54-
else
55-
y, completed = @inferred eval_tree_array(only(trees), X, operators)
56-
gpu_y, gpu_completed = @inferred eval_tree_array(
57-
only(trees), FakeCuArray(X), operators
58-
)
59-
if completed
60-
@test y gpu_y
60+
y, completed = @inferred eval_tree_array(only(trees), X, operators)
61+
gpu_y, gpu_completed = @inferred eval_tree_array(
62+
only(trees), FakeCuArray(X), operators
63+
)
64+
if completed
65+
@test y gpu_y
66+
end
6167
end
6268
end
6369
end
6470
end
71+
72+
@testset "Evaluation on pre-computed buffers" begin
73+
let
74+
operators = OperatorEnum(;
75+
binary_operators=[+, -, *, /], unary_operators=[sin, cos]
76+
)
77+
x1, x2, x3 = (i -> Node(Float64; feature=i)).(1:3)
78+
Random.seed!(0)
79+
tree = sin(x1 * 3.1 - x3 * 0.9 + 0.2) * x2 - x3 * x3 * 1.5
80+
X = randn(Float64, 3, 100)
81+
82+
y1, _ = eval_tree_array(tree, X, operators)
83+
y2, _ = eval_tree_array(tree, FakeCuArray(X), operators)
84+
85+
@test y1 y2
86+
87+
(; val, roots, buffer, num_nodes, num_launches) = as_array(Int32, tree)
88+
gpu_buffer = FakeCuArray(buffer)
89+
gpu_workspace = FakeCuArray(zeros(Float64, 101, 50))
90+
copyto!((@view gpu_workspace[end, :]), val)
91+
92+
# Now, with all buffers:
93+
y3, _ = eval_tree_array(
94+
tree,
95+
FakeCuArray(X),
96+
operators;
97+
gpu_workspace,
98+
gpu_buffer,
99+
roots,
100+
num_nodes,
101+
num_launches,
102+
update_buffers=Val(false),
103+
)
104+
@test y1 y3
105+
106+
# Should be able to shift some of the values in this buffer:
107+
i = findfirst(gpu_workspace[end, :] .== 0.9)
108+
gpu_workspace[end, i] = 0.8
109+
110+
# And get the updated results:
111+
tree_prime = sin(x1 * 3.1 - x3 * 0.8 + 0.2) * x2 - x3 * x3 * 1.5
112+
y1_prime, _ = eval_tree_array(tree_prime, X, operators)
113+
y3_prime, _ = eval_tree_array(
114+
x1, # Doesn't matter what we put here
115+
FakeCuArray(X),
116+
operators;
117+
gpu_workspace,
118+
gpu_buffer,
119+
roots,
120+
num_nodes,
121+
num_launches,
122+
update_buffers=Val(false),
123+
)
124+
@test y1_prime y3_prime
125+
end
126+
end

0 commit comments

Comments
 (0)