1
1
// ==============================================================
2
- // Copyright © 2020 Intel Corporation
2
+ // Copyright © 2020-2023 Intel Corporation
3
3
//
4
4
// SPDX-License-Identifier: MIT
5
5
// =============================================================
15
15
#include < sycl/sycl.hpp>
16
16
#include " oneapi/mkl.hpp"
17
17
18
- double rand_uniform ();
19
- bool verify_result (int m, int n, int k, int ldc, double *C, double *C_reference);
18
+ float rand_uniform ();
19
+ bool verify_result (int m, int n, int k, int ldc, const float *C, const float *C_reference);
20
20
21
21
int main ()
22
22
{
@@ -29,7 +29,7 @@ int main()
29
29
// optional matrix transposition.
30
30
//
31
31
// For this simple matrix multiplication, no transposition is needed.
32
- //
32
+ //
33
33
// By choosing alpha = 1, beta = 0, GEMM will calculate C = A * B.
34
34
//
35
35
// In this example, matrices are stored in row-major layout.
@@ -38,7 +38,7 @@ int main()
38
38
auto transB = oneapi::mkl::transpose::nontrans;
39
39
40
40
// Matrix data sizes.
41
- //
41
+ //
42
42
// A is m x k
43
43
// B is k x n --> product C is m x n
44
44
int m = 600 ;
@@ -52,21 +52,21 @@ int main()
52
52
int ldc = n;
53
53
54
54
// Scaling factors.
55
- double alpha = 1.0 ;
56
- double beta = 0.0 ;
55
+ float alpha = 1 .0f ;
56
+ float beta = 0 .0f ;
57
57
58
58
// Create a queue on the default device.
59
- sycl::queue device_queue{sycl::default_selector{} };
59
+ sycl::queue device_queue{sycl::default_selector_v };
60
60
61
61
std::cout << " Device: "
62
62
<< device_queue.get_device ().get_info <sycl::info::device::name>()
63
63
<< std::endl;
64
64
65
65
// Allocate shared memory for matrices.
66
- auto A = sycl::malloc_shared<double >(m * k, device_queue);
67
- auto B = sycl::malloc_shared<double >(k * n, device_queue);
68
- auto C = sycl::malloc_shared<double >(m * n, device_queue);
69
- auto C_reference = (double *) calloc (m * n, sizeof (double ));
66
+ auto A = sycl::malloc_shared<float >(m * k, device_queue);
67
+ auto B = sycl::malloc_shared<float >(k * n, device_queue);
68
+ auto C = sycl::malloc_shared<float >(m * n, device_queue);
69
+ auto C_reference = (float *) calloc (m * n, sizeof (float ));
70
70
71
71
if (!A || !B || !C || !C_reference) {
72
72
std::cerr << " Could not allocate memory for matrices." << std::endl;
@@ -98,7 +98,7 @@ int main()
98
98
for (int h = 0 ; h < k; h++)
99
99
for (int j = 0 ; j < n; j++)
100
100
C_reference[i * ldc + j] += A[i * lda + h] * B[h * ldb + j];
101
-
101
+
102
102
// Wait for oneMKL computation to complete.
103
103
device_queue.wait_and_throw ();
104
104
@@ -120,14 +120,15 @@ int main()
120
120
}
121
121
}
122
122
123
- double rand_uniform ()
123
+ float rand_uniform ()
124
124
{
125
- return double (rand ()) / RAND_MAX;
125
+ return float (rand ()) / float ( RAND_MAX) ;
126
126
}
127
127
128
- bool verify_result (int m, int n, int k, int ldc, double *C, double *C_reference)
128
+ bool verify_result (int m, int n, int k, int ldc,
129
+ const float *C, const float *C_reference)
129
130
{
130
- double tolerance = 1e-6 ;
131
+ float tolerance = 1e-3 ;
131
132
bool ok = true ;
132
133
133
134
// Compare host side results with the result buffer from device side: print
0 commit comments