Skip to content

Commit 3c8f7aa

Browse files
authored
FPGA: add simulation support to the private_copies samples (#1296)
1 parent 06519da commit 3c8f7aa

File tree

3 files changed

+50
-6
lines changed

3 files changed

+50
-6
lines changed

DirectProgramming/C++SYCL_FPGA/Tutorials/Features/private_copies/README.md

+21-2
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ A typical design flow may be to:
130130
```
131131
make report
132132
```
133+
* Compile for simulation (fast compile time, targets simulated FPGA device, reduced data size):
134+
```
135+
make fpga_sim
136+
```
133137
* Compile for FPGA hardware (longer compile time, targets FPGA device):
134138
```
135139
make fpga
@@ -167,6 +171,10 @@ A typical design flow may be to:
167171
```
168172
nmake report
169173
```
174+
* Compile for simulation (fast compile time, targets simulated FPGA device, reduced data size):
175+
```
176+
nmake fpga_sim
177+
```
170178
* Compile for FPGA hardware (longer compile time, targets FPGA device):
171179
```
172180
nmake fpga
@@ -188,7 +196,18 @@ On the main report page, scroll down to the section titled "Estimated Resource U
188196
./private_copies.fpga_emu (Linux)
189197
private_copies.fpga_emu.exe (Windows)
190198
```
191-
2. Run the sample on the FPGA device:
199+
2. Run the sample on the FPGA simulator device:
200+
* On Linux
201+
```bash
202+
CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./private_copies.fpga_sim
203+
```
204+
* On Windows
205+
```bash
206+
set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
207+
private_copies.fpga_sim.exe
208+
set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
209+
```
210+
3. Run the sample on the FPGA device:
192211
```
193212
./private_copies.fpga (Linux)
194213
private_copies.fpga.exe (Windows)
@@ -218,7 +237,7 @@ When run on the Intel® PAC with Intel Arria10® 10 GX FPGA hardware board
218237
219238
Setting the `private_copies` attribute to 0 (or equivalently omitting the attribute entirely) produced good throughput, and the reports show us that the compiler selected 3 private copies. This does produce the optimal throughput, but in this case it probably makes sense to save some area in exchange for a very small throughput loss by specifying 2 private copies instead.
220239
221-
When run on the FPGA emulator, the `private_copies` attribute has no effect on kernel time. You may actually notice that the emulator achieved higher throughput than the FPGA in this example. This is because this trivial example uses only a tiny fraction of the spatial compute resources available on the FPGA.
240+
When run on the FPGA emulator or simulator, the `private_copies` attribute has no effect on kernel time. You may actually notice that the emulator achieved higher throughput than the FPGA in this example. This is because this trivial example uses only a tiny fraction of the spatial compute resources available on the FPGA.
222241
223242
## License
224243

DirectProgramming/C++SYCL_FPGA/Tutorials/Features/private_copies/src/CMakeLists.txt

+17
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
set(SOURCE_FILE private_copies.cpp)
22
set(TARGET_NAME private_copies)
33
set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
4+
set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim)
45
set(FPGA_TARGET ${TARGET_NAME}.fpga)
56

67
# FPGA board selection
@@ -24,6 +25,8 @@ endif()
2425
# For this reason, FPGA backend flags must be passed as link flags in CMake.
2526
set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR")
2627
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
28+
set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR")
29+
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
2730
set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_HARDWARE")
2831
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
2932
# use cmake -D USER_HARDWARE_FLAGS=<flags> to set extra flags for FPGA backend compilation
@@ -56,6 +59,20 @@ set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS "${HARDWARE_C
5659
set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -fsycl-link=early")
5760
# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus®
5861

62+
###############################################################################
63+
### FPGA Simulator
64+
###############################################################################
65+
# To compile in a single command:
66+
# icpx -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR -Xstarget=<FPGA_DEVICE> private_copies.cpp -o private_copies.fpga
67+
# CMake executes:
68+
# [compile] icpx -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR -o private_copies.cpp.o -c private_copies.cpp
69+
# [link] icpx -fsycl -fintelfpga -Xssimulation -Xstarget=<FPGA_DEVICE> private_copies.cpp.o -o private_copies.fpga
70+
add_executable(${SIMULATOR_TARGET} ${SOURCE_FILE})
71+
target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../../include)
72+
add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET})
73+
set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}")
74+
set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS}")
75+
5976
###############################################################################
6077
### FPGA Hardware
6178
###############################################################################

DirectProgramming/C++SYCL_FPGA/Tutorials/Features/private_copies/src/private_copies.cpp

+12-4
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,14 @@
1515

1616
using namespace sycl;
1717

18-
constexpr size_t kSize = 8192;
18+
#if defined(FPGA_SIMULATOR)
19+
// Smaller size to keep the runtime reasonable
20+
constexpr size_t kSize = 512; //2^9
21+
constexpr size_t kMaxIter = 100;
22+
#else
23+
constexpr size_t kSize = 8192; //2^13
1924
constexpr size_t kMaxIter = 50000;
25+
#endif
2026
constexpr size_t kTotalOps = 2 * kMaxIter * kSize;
2127
constexpr size_t kMaxValue = 128;
2228

@@ -33,10 +39,12 @@ template <int num_copies> class Kernel;
3339
template <int num_copies, bool first_call = false>
3440
void SimpleMathWithShift(const IntArray &array, int shift, IntScalar &result) {
3541

36-
#if FPGA_HARDWARE
37-
auto selector = sycl::ext::intel::fpga_selector_v;
42+
#if FPGA_SIMULATOR
43+
auto selector = sycl::ext::intel::fpga_simulator_selector_v;
44+
#elif FPGA_HARDWARE
45+
auto selector = sycl::ext::intel::fpga_selector_v;
3846
#else // #if FPGA_EMULATOR
39-
auto selector = sycl::ext::intel::fpga_emulator_selector_v;
47+
auto selector = sycl::ext::intel::fpga_emulator_selector_v;
4048
#endif
4149

4250
double kernel_time = 0.0;

0 commit comments

Comments
 (0)