Skip to content

Commit 74094e3

Browse files
committed
Start working on CPU-based unittests
1 parent 5befede commit 74094e3

File tree

3 files changed

+78
-15
lines changed

3 files changed

+78
-15
lines changed

ext/DynamicExpressionsCUDAExt.jl

Lines changed: 44 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,36 @@
11
module DynamicExpressionsCUDAExt
22

3-
using CUDA
3+
using CUDA: @cuda, CuArray, blockDim, blockIdx, threadIdx
44
using DynamicExpressions: OperatorEnum, AbstractExpressionNode
55
using DynamicExpressions.EvaluateEquationModule: get_nbin, get_nuna
66
using DynamicExpressions.AsArrayModule: as_array
77

88
import DynamicExpressions.EvaluateEquationModule: eval_tree_array
99

10+
# array type for exclusively testing purposes
11+
struct FakeCuArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N}
12+
a::A
13+
end
14+
Base.similar(x::FakeCuArray, dims::Integer...) = FakeCuArray(similar(x.a, dims...))
15+
Base.getindex(x::FakeCuArray, i::Int...) = getindex(x.a, i...)
16+
Base.setindex!(x::FakeCuArray, v, i::Int...) = setindex!(x.a, v, i...)
17+
Base.size(x::FakeCuArray) = size(x.a)
18+
19+
const MaybeCuArray{T,N} = Union{CuArray{T,2},FakeCuArray{T,N}}
20+
21+
to_device(a, ::CuArray) = CuArray(a)
22+
to_device(a, ::FakeCuArray) = FakeCuArray(a)
23+
1024
function eval_tree_array(
11-
tree::AbstractExpressionNode{T}, gcX::CuArray{T,2}, operators::OperatorEnum; kws...
25+
tree::AbstractExpressionNode{T}, gcX::MaybeCuArray{T,2}, operators::OperatorEnum; kws...
1226
) where {T<:Number}
1327
(outs, is_good) = eval_tree_array((tree,), gcX, operators; kws...)
1428
return (only(outs), only(is_good))
1529
end
1630

1731
function eval_tree_array(
18-
trees::NTuple{M,N},
19-
gcX::CuArray{T,2},
32+
trees::Tuple{N,Vararg{N,M}},
33+
gcX::MaybeCuArray{T,2},
2034
operators::OperatorEnum;
2135
buffer=nothing,
2236
gpu_workspace=nothing,
@@ -29,15 +43,19 @@ function eval_tree_array(
2943

3044
## Floating point arrays:
3145
gworkspace = if gpu_workspace === nothing
32-
CuArray{T}(undef, num_elem, num_nodes + 1)
46+
similar(gcX, num_elem, num_nodes + 1)
3347
else
3448
gpu_workspace
3549
end
3650
gval = @view gworkspace[:, end]
3751
copyto!(gval, val)
3852

3953
## Index arrays (much faster to have `@view` here)
40-
gbuffer = gpu_buffer === nothing ? CuArray(buffer) : copyto!(gpu_buffer, buffer)
54+
gbuffer = if gpu_buffer === nothing
55+
to_device(buffer, gcX)
56+
else
57+
copyto!(gpu_buffer, buffer)
58+
end
4159
gdegree = @view gbuffer[1, :]
4260
gfeature = @view gbuffer[2, :]
4361
gop = @view gbuffer[3, :]
@@ -61,10 +79,10 @@ function eval_tree_array(
6179
)
6280
#! format: on
6381

64-
out = ntuple(i -> @view(gworkspace[:, roots[i]]), Val(M))
82+
out = ntuple(i -> @view(gworkspace[:, roots[i]]), Val(M + 1))
6583
is_good = ntuple(
6684
i -> true, # Up to user to find NaNs
67-
Val(M),
85+
Val(M + 1),
6886
)
6987

7088
return (out, is_good)
@@ -87,12 +105,24 @@ function _launch_gpu_kernel!(
87105
gpu_kernel! = create_gpu_kernel(operators, Val(nuna), Val(nbin))
88106
for launch in one(I):I(num_launches)
89107
#! format: off
90-
@cuda threads=num_threads blocks=num_blocks gpu_kernel!(
91-
buffer,
92-
launch, num_elem, num_nodes, execution_order,
93-
cX, idx_self, idx_l, idx_r,
94-
degree, constant, val, feature, op
95-
)
108+
if buffer isa CuArray
109+
@cuda threads=num_threads blocks=num_blocks gpu_kernel!(
110+
buffer,
111+
launch, num_elem, num_nodes, execution_order,
112+
cX, idx_self, idx_l, idx_r,
113+
degree, constant, val, feature, op
114+
)
115+
else
116+
Threads.@threads for i in 1:(num_threads * num_blocks)
117+
gpu_kernel!(
118+
buffer,
119+
launch, num_elem, num_nodes, execution_order,
120+
cX, idx_self, idx_l, idx_r,
121+
degree, constant, val, feature, op,
122+
i
123+
)
124+
end
125+
end
96126
#! format: on
97127
end
98128
return nothing

test/test_cuda.jl

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
using DynamicExpressions
2+
using CUDA
3+
using Random
4+
5+
ext = Base.get_extension(DynamicExpressions, :DynamicExpressionsCUDAExt)
6+
const FakeCuArray = ext.FakeCuArray
7+
8+
include("tree_gen_utils.jl")
9+
10+
let
11+
operators = OperatorEnum(; binary_operators=[+, -, *, /], unary_operators=[cos, sin]);
12+
x1, x2, x3 = (i -> Node(Float64; feature=i)).(1:3)
13+
14+
for T in (Float32, Float64, ComplexF64), num_trees in (1, 2, 3), seed in 0:10
15+
Random.seed!(seed)
16+
num_rows = rand(10:30)
17+
nodes_per = rand(10:25, num_trees)
18+
trees = ntuple(i -> gen_random_tree_fixed_size(nodes_per[i], operators, 3, T), num_trees)
19+
@show trees
20+
X = randn(T, 3, num_rows)
21+
y, completed = eval_tree_array(trees, X, operators)
22+
gpu_y, gpu_completed = eval_tree_array(trees, FakeCuArray(X), operators)
23+
gpu_y = Array.(gpu_y)
24+
25+
for i in eachindex(completed, gpu_completed)
26+
@test ((completed[i] && gpu_completed[i]) && (y[i] gpu_y[i])) || (!completed[i] && !gpu_completed[i])
27+
end
28+
end
29+
end

test/unittest.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ end
99
end
1010

1111
# Trigger extensions:
12-
using Zygote, SymbolicUtils, LoopVectorization, Bumper, Optim
12+
using Zygote, SymbolicUtils, LoopVectorization, Bumper, Optim, CUDA
1313

1414
@safetestset "Test deprecations" begin
1515
include("test_deprecations.jl")
@@ -110,3 +110,7 @@ end
110110
@safetestset "Test random sampling" begin
111111
include("test_random.jl")
112112
end
113+
114+
@safetestset "Test CUDA" begin
115+
include("test_cuda.jl")
116+
end

0 commit comments

Comments
 (0)