Skip to content

FPGA: Fix II error and update namespace for device_ptr #2525

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
#include "tuple.hpp"
#include "unrolled_loop.hpp"

using namespace sycl::ext::intel::experimental;
using namespace sycl::ext::oneapi::experimental;

constexpr int BL0 = 0;

// Read matrix_count matrices of type TT from DDR by bursts of num_elem_per_bank
// elements, and write the matrices to the "MatrixPipe" pipe num_elem_per_bank by
Expand Down Expand Up @@ -65,7 +69,12 @@ template <typename TT, // Datatype of the elements of the matrix
typename MatrixPipe // Input matrix
>
void MatrixReadPipeToDDR(
#if defined (IS_BSP)
TT* matrix_ptr, // Output matrix pointer
#else
annotated_ptr<TT, decltype(properties{buffer_location<BL0>,
dwidth<512>})> matrix_ptr,
#endif
int matrix_count, // Number of matrix to write to DDR
int repetitions // Number of time to read the same matrix to the pipe
) {
Expand Down Expand Up @@ -147,7 +156,7 @@ void VectorReadPipeToDDR(
// lives on the device.
// Knowing this, the compiler won't generate hardware to
// potentially get data from the host.
sycl::device_ptr<TT> vector_ptr_located(vector_ptr);
sycl::ext::intel::device_ptr<TT> vector_ptr_located(vector_ptr);
#else
// Device pointers are not supported when targeting an FPGA
// family/part
Expand All @@ -166,4 +175,4 @@ void VectorReadPipeToDDR(
} // end of repetition
}

#endif /* __MEMORY_TRANSFERS_HPP__ */
#endif /* __MEMORY_TRANSFERS_HPP__ */
39 changes: 34 additions & 5 deletions DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/svd/src/svd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "memory_transfers.hpp"
#include "usv_from_eigens.hpp"

using namespace sycl::ext::intel::experimental;
using namespace sycl::ext::oneapi::experimental;

// Forward declare the kernel and pipe names
// (This prevents unwanted name mangling in the optimization report.)
Expand Down Expand Up @@ -118,6 +120,15 @@ double SingularValueDecomposition(
std::terminate();
}

#if not defined (IS_BSP)
constexpr int BL0 = 0;
using PtrAnn = annotated_ptr<T, decltype(properties{buffer_location<BL0>,
dwidth<512>})>;
PtrAnn u_matrix_device_ptr(u_matrix_device);
PtrAnn s_matrix_device_ptr(s_matrix_device);
PtrAnn v_matrix_device_ptr(v_matrix_device);
#endif

// Check that the malloc succeeded.
if (nullptr == input_matrix_device) {
std::cerr << "Error when allocating the input matrix." << std::endl;
Expand Down Expand Up @@ -151,7 +162,7 @@ double SingularValueDecomposition(
[=]() [[intel::kernel_args_restrict]] {
MatrixReadFromDDRTo2PipesByBlocks<
T, cols, rows, kNumElementsPerDDRBurst, InputMatrixPipe, InputMatrixPipe2>(
input_matrix_device, matrix_count, repetitions);
input_matrix_device, matrix_count, repetitions);
});
});

Expand Down Expand Up @@ -207,21 +218,39 @@ double SingularValueDecomposition(
sycl::event u_matrix_event = q.single_task<IDUMatrixFromLocalMemToDDR>(
[=]() [[intel::kernel_args_restrict]] {
MatrixReadPipeToDDR<T, rows, rows, kNumElementsPerDDRBurst,
UMatrixPipe>(u_matrix_device, matrix_count, repetitions);
UMatrixPipe>(
#if defined (IS_BSP)
u_matrix_device,
#else
u_matrix_device_ptr,
#endif
matrix_count, repetitions);
});

// collecting s matrix from pipe into DDR
sycl::event s_matrix_event = q.single_task<IDSMatrixFromLocalMemToDDR>(
[=]() [[intel::kernel_args_restrict]] {
MatrixReadPipeToDDR<T, rows, cols, kNumElementsPerDDRBurst,
SMatrixPipe>(s_matrix_device, matrix_count, repetitions);
SMatrixPipe>(
#if defined (IS_BSP)
s_matrix_device,
#else
s_matrix_device_ptr,
#endif
matrix_count, repetitions);
});

// collecting V matrix from pipe into DDR
sycl::event v_matrix_event = q.single_task<IDVMatrixFromLocalMemToDDR>(
[=]() [[intel::kernel_args_restrict]] {
MatrixReadPipeToDDR<T, cols, cols, kNumElementsPerDDRBurst,
VMatrixPipe>(v_matrix_device, matrix_count, repetitions);
VMatrixPipe>(
#if defined (IS_BSP)
v_matrix_device,
#else
v_matrix_device_ptr,
#endif
matrix_count, repetitions);
});

// Wait for output memory access kernels to finish
Expand Down Expand Up @@ -260,4 +289,4 @@ double SingularValueDecomposition(
return diff;
}

#endif // __SVD_HPP__
#endif // __SVD_HPP__