oneapi-src · yuguen · Oct 21, 2024 · Oct 18, 2024 · Oct 18, 2024 · Oct 18, 2024
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/svd/src/memory_transfers.hpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/svd/src/memory_transfers.hpp
@@ -5,6 +5,10 @@
 #include "tuple.hpp"
 #include "unrolled_loop.hpp"
 
+using namespace sycl::ext::intel::experimental;
+using namespace sycl::ext::oneapi::experimental;
+
+constexpr int BL0 = 0;
 
 // Read matrix_count matrices of type TT from DDR by bursts of num_elem_per_bank
 // elements, and write the matrices to the "MatrixPipe" pipe num_elem_per_bank by
@@ -65,7 +69,12 @@ template <typename TT,            // Datatype of the elements of the matrix
           typename MatrixPipe     // Input matrix
           >
 void MatrixReadPipeToDDR(
+#if defined (IS_BSP)
     TT* matrix_ptr,    // Output matrix pointer
+#else
+    annotated_ptr<TT, decltype(properties{buffer_location<BL0>,
+	    				  dwidth<512>})> matrix_ptr,
+#endif
     int matrix_count,  // Number of matrix to write to DDR
     int repetitions    // Number of time to read the same matrix to the pipe
 ) {
@@ -147,7 +156,7 @@ void VectorReadPipeToDDR(
   // lives on the device.
   // Knowing this, the compiler won't generate hardware to
   // potentially get data from the host.
-  sycl::device_ptr<TT> vector_ptr_located(vector_ptr);
+  sycl::ext::intel::device_ptr<TT> vector_ptr_located(vector_ptr);
 #else
   // Device pointers are not supported when targeting an FPGA
   // family/part
@@ -166,4 +175,4 @@ void VectorReadPipeToDDR(
   }      // end of repetition
 }
 
-#endif /* __MEMORY_TRANSFERS_HPP__ */
+#endif /* __MEMORY_TRANSFERS_HPP__ */
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/svd/src/svd.hpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/svd/src/svd.hpp
@@ -17,6 +17,8 @@
 #include "memory_transfers.hpp"
 #include "usv_from_eigens.hpp"
 
+using namespace sycl::ext::intel::experimental;
+using namespace sycl::ext::oneapi::experimental;
 
 // Forward declare the kernel and pipe names
 // (This prevents unwanted name mangling in the optimization report.)
@@ -118,6 +120,15 @@ double SingularValueDecomposition(
     std::terminate();
   }
 
+#if not defined (IS_BSP)
+  constexpr int BL0 = 0;
+  using PtrAnn = annotated_ptr<T, decltype(properties{buffer_location<BL0>,
+		  				      dwidth<512>})>;
+  PtrAnn u_matrix_device_ptr(u_matrix_device);
+  PtrAnn s_matrix_device_ptr(s_matrix_device);
+  PtrAnn v_matrix_device_ptr(v_matrix_device);
+#endif
+
   // Check that the malloc succeeded.
   if (nullptr == input_matrix_device) {
     std::cerr << "Error when allocating the input matrix." << std::endl;
@@ -151,7 +162,7 @@ double SingularValueDecomposition(
         [=]() [[intel::kernel_args_restrict]] {
           MatrixReadFromDDRTo2PipesByBlocks<
               T, cols, rows, kNumElementsPerDDRBurst, InputMatrixPipe, InputMatrixPipe2>(
-              input_matrix_device, matrix_count, repetitions);
+	      input_matrix_device, matrix_count, repetitions);
         });
   });
 
@@ -207,21 +218,39 @@ double SingularValueDecomposition(
   sycl::event u_matrix_event = q.single_task<IDUMatrixFromLocalMemToDDR>(
       [=]() [[intel::kernel_args_restrict]] {
         MatrixReadPipeToDDR<T, rows, rows, kNumElementsPerDDRBurst,
-                            UMatrixPipe>(u_matrix_device, matrix_count, repetitions);
+                            UMatrixPipe>(
+#if defined (IS_BSP)
+		u_matrix_device,
+#else
+		u_matrix_device_ptr,
+#endif	
+		matrix_count, repetitions);
       });
 
   // collecting s matrix from pipe into DDR
   sycl::event s_matrix_event = q.single_task<IDSMatrixFromLocalMemToDDR>(
       [=]() [[intel::kernel_args_restrict]] {
         MatrixReadPipeToDDR<T, rows, cols, kNumElementsPerDDRBurst,
-                            SMatrixPipe>(s_matrix_device, matrix_count, repetitions);
+                            SMatrixPipe>(
+#if defined (IS_BSP)
+		s_matrix_device,
+#else
+		s_matrix_device_ptr,
+#endif
+	       	matrix_count, repetitions);
       });
 
   // collecting V matrix from pipe into DDR
   sycl::event v_matrix_event = q.single_task<IDVMatrixFromLocalMemToDDR>(
       [=]() [[intel::kernel_args_restrict]] {
         MatrixReadPipeToDDR<T, cols, cols, kNumElementsPerDDRBurst,
-                            VMatrixPipe>(v_matrix_device, matrix_count, repetitions);
+                            VMatrixPipe>(
+#if defined (IS_BSP)
+		v_matrix_device,
+#else
+		v_matrix_device_ptr,
+#endif
+	       	matrix_count, repetitions);
       });
 
   // Wait for output memory access kernels to finish
@@ -260,4 +289,4 @@ double SingularValueDecomposition(
   return diff;
 }
 
-#endif  // __SVD_HPP__
+#endif  // __SVD_HPP__