@@ -15,6 +15,7 @@ Base.similar(x::FakeCuArray, dims::Integer...) = FakeCuArray(similar(x.a, dims..
15
15
Base. getindex (x:: FakeCuArray , i:: Int... ) = getindex (x. a, i... )
16
16
Base. setindex! (x:: FakeCuArray , v, i:: Int... ) = setindex! (x. a, v, i... )
17
17
Base. size (x:: FakeCuArray ) = size (x. a)
18
+ Base. Array (x:: FakeCuArray ) = Array (x. a)
18
19
19
20
const MaybeCuArray{T,N} = Union{CuArray{T,2 },FakeCuArray{T,N}}
20
21
@@ -41,13 +42,16 @@ function eval_tree_array(
41
42
num_launches = maximum (execution_order)
42
43
num_elem = size (gcX, 2 )
43
44
44
- # # Floating point arrays:
45
+ # # The following array is our "workspace" for
46
+ # # the GPU kernel, with size equal to the number of rows
47
+ # # in the input data by the number of nodes in the tree.
48
+ # # It has one extra row to store the constant values.
45
49
gworkspace = if gpu_workspace === nothing
46
- similar (gcX, num_elem, num_nodes + 1 )
50
+ similar (gcX, num_elem + 1 , num_nodes )
47
51
else
48
52
gpu_workspace
49
53
end
50
- gval = @view gworkspace[:, end ]
54
+ gval = @view gworkspace[end , : ]
51
55
copyto! (gval, val)
52
56
53
57
# # Index arrays (much faster to have `@view` here)
@@ -79,7 +83,7 @@ function eval_tree_array(
79
83
)
80
84
# ! format: on
81
85
82
- out = ntuple (i -> @view (gworkspace[: , roots[i]]), Val (M + 1 ))
86
+ out = ntuple (i -> @view (gworkspace[begin : end - 1 , roots[i]]), Val (M + 1 ))
83
87
is_good = ntuple (
84
88
i -> true , # Up to user to find NaNs
85
89
Val (M + 1 ),
0 commit comments