Skip to content

matrix_mul_mkl: use single precision for compatibility with all devices #1310

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 25, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 18 additions & 17 deletions Libraries/oneMKL/matrix_mul_mkl/matrix_mul_mkl.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//==============================================================
// Copyright © 2020 Intel Corporation
// Copyright © 2020-2023 Intel Corporation
//
// SPDX-License-Identifier: MIT
// =============================================================
Expand All @@ -15,8 +15,8 @@
#include <sycl/sycl.hpp>
#include "oneapi/mkl.hpp"

double rand_uniform();
bool verify_result(int m, int n, int k, int ldc, double *C, double *C_reference);
float rand_uniform();
bool verify_result(int m, int n, int k, int ldc, const float *C, const float *C_reference);

int main()
{
Expand All @@ -29,7 +29,7 @@ int main()
// optional matrix transposition.
//
// For this simple matrix multiplication, no transposition is needed.
//
//
// By choosing alpha = 1, beta = 0, GEMM will calculate C = A * B.
//
// In this example, matrices are stored in row-major layout.
Expand All @@ -38,7 +38,7 @@ int main()
auto transB = oneapi::mkl::transpose::nontrans;

// Matrix data sizes.
//
//
// A is m x k
// B is k x n --> product C is m x n
int m = 600;
Expand All @@ -52,21 +52,21 @@ int main()
int ldc = n;

// Scaling factors.
double alpha = 1.0;
double beta = 0.0;
float alpha = 1.0f;
float beta = 0.0f;

// Create a queue on the default device.
sycl::queue device_queue{sycl::default_selector{}};
sycl::queue device_queue{sycl::default_selector_v};

std::cout << "Device: "
<< device_queue.get_device().get_info<sycl::info::device::name>()
<< std::endl;

// Allocate shared memory for matrices.
auto A = sycl::malloc_shared<double>(m * k, device_queue);
auto B = sycl::malloc_shared<double>(k * n, device_queue);
auto C = sycl::malloc_shared<double>(m * n, device_queue);
auto C_reference = (double *) calloc(m * n, sizeof(double));
auto A = sycl::malloc_shared<float>(m * k, device_queue);
auto B = sycl::malloc_shared<float>(k * n, device_queue);
auto C = sycl::malloc_shared<float>(m * n, device_queue);
auto C_reference = (float *) calloc(m * n, sizeof(float));

if (!A || !B || !C || !C_reference) {
std::cerr << "Could not allocate memory for matrices." << std::endl;
Expand Down Expand Up @@ -98,7 +98,7 @@ int main()
for (int h = 0; h < k; h++)
for (int j = 0; j < n; j++)
C_reference[i * ldc + j] += A[i * lda + h] * B[h * ldb + j];

// Wait for oneMKL computation to complete.
device_queue.wait_and_throw();

Expand All @@ -120,14 +120,15 @@ int main()
}
}

double rand_uniform()
float rand_uniform()
{
return double(rand()) / RAND_MAX;
return float(rand()) / float(RAND_MAX);
}

bool verify_result(int m, int n, int k, int ldc, double *C, double *C_reference)
bool verify_result(int m, int n, int k, int ldc,
const float *C, const float *C_reference)
{
double tolerance = 1e-6;
float tolerance = 1e-3;
bool ok = true;

// Compare host side results with the result buffer from device side: print
Expand Down