@@ -274,15 +274,18 @@ struct StreamingQRD {
274
274
// kFanoutReduction times to reduce fanout
275
275
bool j_eq_i[kBanksForFanout ], i_gt_0[kBanksForFanout ],
276
276
i_ge_0_j_ge_i[kBanksForFanout ], j_eq_i_plus_1[kBanksForFanout ],
277
- i_lt_0[kBanksForFanout ];
277
+ i_lt_0[kBanksForFanout ], j_ge_0[ kBanksForFanout ] ;
278
278
279
279
fpga_tools::UnrolledLoop<kBanksForFanout >([&](auto k) {
280
280
i_gt_0[k] = sycl::ext::intel::fpga_reg (i > 0 );
281
281
i_lt_0[k] = sycl::ext::intel::fpga_reg (i < 0 );
282
282
j_eq_i[k] = sycl::ext::intel::fpga_reg (j == i);
283
+ j_ge_0[k] = sycl::ext::intel::fpga_reg (j >= 0 );
283
284
i_ge_0_j_ge_i[k] = sycl::ext::intel::fpga_reg (i >= 0 && j >= i);
284
285
j_eq_i_plus_1[k] = sycl::ext::intel::fpga_reg (j == i + 1 );
285
- s_or_ir_j[k] = sycl::ext::intel::fpga_reg (s_or_ir[j]);
286
+ if (j >= 0 ) {
287
+ s_or_ir_j[k] = sycl::ext::intel::fpga_reg (s_or_ir[j]);
288
+ }
286
289
});
287
290
288
291
// Preload col and a_i with the correct data for the current iteration
@@ -298,14 +301,14 @@ struct StreamingQRD {
298
301
// If no i iteration elapsed, we must read the column of
299
302
// matrix A directly from the a_load; col then contains a_j
300
303
301
- if (i_gt_0[fanout_bank_idx]) {
304
+ if (i_gt_0[fanout_bank_idx] && j_ge_0[fanout_bank_idx] ) {
302
305
col[k] = a_compute[j].template get <k>();
303
306
}
304
307
// Using an else statement makes the compiler throw an
305
308
// inexplicable warning when using non complex types:
306
309
// "Compiler Warning: Memory instruction with unresolved
307
310
// pointer may lead to bad QoR."
308
- if (!i_gt_0[fanout_bank_idx]) {
311
+ if (!i_gt_0[fanout_bank_idx] && j_ge_0[fanout_bank_idx] ) {
309
312
col[k] = a_load[j].template get <k>();
310
313
}
311
314
@@ -347,7 +350,7 @@ struct StreamingQRD {
347
350
// are either going to be:
348
351
// -> overwritten for the matrix Q (q_result)
349
352
// -> unused for the a_compute
350
- if (i_ge_0_j_ge_i[fanout_bank_idx]) {
353
+ if (i_ge_0_j_ge_i[fanout_bank_idx] && j_ge_0[fanout_bank_idx] ) {
351
354
q_result[j].template get <k>() = col1[k];
352
355
a_compute[j].template get <k>() = col1[k];
353
356
}
@@ -484,4 +487,4 @@ struct StreamingQRD {
484
487
485
488
} // namespace fpga_linalg
486
489
487
- #endif /* __STREAMING_QRD_HPP__ */
490
+ #endif /* __STREAMING_QRD_HPP__ */
0 commit comments