Skip to content

Commit 6d1f764

Browse files
authored
matrix_mul_mkl: use single precision for compatibility with all devices (#1310)
1 parent 584f47f commit 6d1f764

File tree

1 file changed

+18
-17
lines changed

1 file changed

+18
-17
lines changed

Libraries/oneMKL/matrix_mul_mkl/matrix_mul_mkl.cpp

+18-17
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//==============================================================
2-
// Copyright © 2020 Intel Corporation
2+
// Copyright © 2020-2023 Intel Corporation
33
//
44
// SPDX-License-Identifier: MIT
55
// =============================================================
@@ -15,8 +15,8 @@
1515
#include <sycl/sycl.hpp>
1616
#include "oneapi/mkl.hpp"
1717

18-
double rand_uniform();
19-
bool verify_result(int m, int n, int k, int ldc, double *C, double *C_reference);
18+
float rand_uniform();
19+
bool verify_result(int m, int n, int k, int ldc, const float *C, const float *C_reference);
2020

2121
int main()
2222
{
@@ -29,7 +29,7 @@ int main()
2929
// optional matrix transposition.
3030
//
3131
// For this simple matrix multiplication, no transposition is needed.
32-
//
32+
//
3333
// By choosing alpha = 1, beta = 0, GEMM will calculate C = A * B.
3434
//
3535
// In this example, matrices are stored in row-major layout.
@@ -38,7 +38,7 @@ int main()
3838
auto transB = oneapi::mkl::transpose::nontrans;
3939

4040
// Matrix data sizes.
41-
//
41+
//
4242
// A is m x k
4343
// B is k x n --> product C is m x n
4444
int m = 600;
@@ -52,21 +52,21 @@ int main()
5252
int ldc = n;
5353

5454
// Scaling factors.
55-
double alpha = 1.0;
56-
double beta = 0.0;
55+
float alpha = 1.0f;
56+
float beta = 0.0f;
5757

5858
// Create a queue on the default device.
59-
sycl::queue device_queue{sycl::default_selector{}};
59+
sycl::queue device_queue{sycl::default_selector_v};
6060

6161
std::cout << "Device: "
6262
<< device_queue.get_device().get_info<sycl::info::device::name>()
6363
<< std::endl;
6464

6565
// Allocate shared memory for matrices.
66-
auto A = sycl::malloc_shared<double>(m * k, device_queue);
67-
auto B = sycl::malloc_shared<double>(k * n, device_queue);
68-
auto C = sycl::malloc_shared<double>(m * n, device_queue);
69-
auto C_reference = (double *) calloc(m * n, sizeof(double));
66+
auto A = sycl::malloc_shared<float>(m * k, device_queue);
67+
auto B = sycl::malloc_shared<float>(k * n, device_queue);
68+
auto C = sycl::malloc_shared<float>(m * n, device_queue);
69+
auto C_reference = (float *) calloc(m * n, sizeof(float));
7070

7171
if (!A || !B || !C || !C_reference) {
7272
std::cerr << "Could not allocate memory for matrices." << std::endl;
@@ -98,7 +98,7 @@ int main()
9898
for (int h = 0; h < k; h++)
9999
for (int j = 0; j < n; j++)
100100
C_reference[i * ldc + j] += A[i * lda + h] * B[h * ldb + j];
101-
101+
102102
// Wait for oneMKL computation to complete.
103103
device_queue.wait_and_throw();
104104

@@ -120,14 +120,15 @@ int main()
120120
}
121121
}
122122

123-
double rand_uniform()
123+
float rand_uniform()
124124
{
125-
return double(rand()) / RAND_MAX;
125+
return float(rand()) / float(RAND_MAX);
126126
}
127127

128-
bool verify_result(int m, int n, int k, int ldc, double *C, double *C_reference)
128+
bool verify_result(int m, int n, int k, int ldc,
129+
const float *C, const float *C_reference)
129130
{
130-
double tolerance = 1e-6;
131+
float tolerance = 1e-3;
131132
bool ok = true;
132133

133134
// Compare host side results with the result buffer from device side: print

0 commit comments

Comments
 (0)