Skip to content

[mlir][spirv] Add integration tests for vector.interleave and vector.shuffle #93858

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 3, 2024

Conversation

angelz913
Copy link
Contributor

@angelz913 angelz913 commented May 30, 2024

This PR tries to reland #93595 which was reverted in #93732 due to some issues. The original PR:

  • Add integration test for vector.shuffle and vector.interleave
  • Add VectorToSPIRV patterns to GPUToSPIRVPass

Description of the issue:

@llvmbot
Copy link
Member

llvmbot commented May 30, 2024

@llvm/pr-subscribers-mlir-spirv
@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-mlir

Author: Angel Zhang (angelz913)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/93858.diff

3 Files Affected:

  • (modified) mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp (+2)
  • (added) mlir/test/mlir-vulkan-runner/vector-interleave.mlir (+79)
  • (added) mlir/test/mlir-vulkan-runner/vector-shuffle.mlir (+79)
diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp
index 1d1db913e3df2..53e73ec0d81bf 100644
--- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp
@@ -18,6 +18,7 @@
 #include "mlir/Conversion/GPUToSPIRV/GPUToSPIRV.h"
 #include "mlir/Conversion/MemRefToSPIRV/MemRefToSPIRV.h"
 #include "mlir/Conversion/SCFToSPIRV/SCFToSPIRV.h"
+#include "mlir/Conversion/VectorToSPIRV/VectorToSPIRV.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
@@ -132,6 +133,7 @@ void GPUToSPIRVPass::runOnOperation() {
     mlir::arith::populateArithToSPIRVPatterns(typeConverter, patterns);
     populateMemRefToSPIRVPatterns(typeConverter, patterns);
     populateFuncToSPIRVPatterns(typeConverter, patterns);
+    populateVectorToSPIRVPatterns(typeConverter, patterns);
 
     if (failed(applyFullConversion(gpuModule, *target, std::move(patterns))))
       return signalPassFailure();
diff --git a/mlir/test/mlir-vulkan-runner/vector-interleave.mlir b/mlir/test/mlir-vulkan-runner/vector-interleave.mlir
new file mode 100644
index 0000000000000..0846d52a45b11
--- /dev/null
+++ b/mlir/test/mlir-vulkan-runner/vector-interleave.mlir
@@ -0,0 +1,79 @@
+// RUN: mlir-vulkan-runner %s \
+// RUN:  --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
+// RUN:  --entry-point-result=void | FileCheck %s
+
+// CHECK: [0, 2, 1, 3]
+module attributes {
+  gpu.container_module,
+  spirv.target_env = #spirv.target_env<
+    #spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>
+} {
+  gpu.module @kernels {
+    gpu.func @kernel_vector_interleave(%arg0 : memref<2xi32>, %arg1 : memref<2xi32>, %arg2 : memref<4xi32>)
+      kernel attributes { spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [1, 1, 1]>} {
+      %idx0 = arith.constant 0 : index
+      %idx1 = arith.constant 1 : index
+      %idx2 = arith.constant 2 : index
+      %idx3 = arith.constant 3 : index
+      %idx4 = arith.constant 4 : index
+
+      %lhs = arith.constant dense<[0, 0]> : vector<2xi32>
+      %rhs = arith.constant dense<[0, 0]> : vector<2xi32>
+
+      %val0 = memref.load %arg0[%idx0] : memref<2xi32>
+      %val1 = memref.load %arg0[%idx1] : memref<2xi32>
+      %val2 = memref.load %arg1[%idx0] : memref<2xi32>
+      %val3 = memref.load %arg1[%idx1] : memref<2xi32>
+
+      %lhs0 = vector.insertelement %val0, %lhs[%idx0 : index] : vector<2xi32>
+      %lhs1 = vector.insertelement %val1, %lhs0[%idx1 : index] : vector<2xi32>
+      %rhs0 = vector.insertelement %val2, %rhs[%idx0 : index] : vector<2xi32>
+      %rhs1 = vector.insertelement %val3, %rhs0[%idx1 : index] : vector<2xi32>
+
+      %interleave = vector.interleave %lhs1, %rhs1 : vector<2xi32> -> vector<4xi32>
+
+      %res0 = vector.extractelement %interleave[%idx0 : index] : vector<4xi32>
+      %res1 = vector.extractelement %interleave[%idx1 : index] : vector<4xi32>
+      %res2 = vector.extractelement %interleave[%idx2 : index] : vector<4xi32>
+      %res3 = vector.extractelement %interleave[%idx3 : index] : vector<4xi32>
+
+      memref.store %res0, %arg2[%idx0]: memref<4xi32>
+      memref.store %res1, %arg2[%idx1]: memref<4xi32>
+      memref.store %res2, %arg2[%idx2]: memref<4xi32>
+      memref.store %res3, %arg2[%idx3]: memref<4xi32>
+
+      gpu.return
+    }
+  }
+
+  func.func @main() {
+    // Allocate 3 buffers.
+    %buf0 = memref.alloc() : memref<2xi32>
+    %buf1 = memref.alloc() : memref<2xi32>
+    %buf2 = memref.alloc() : memref<4xi32>
+
+    %idx0 = arith.constant 0 : index
+    %idx1 = arith.constant 1 : index
+    %idx4 = arith.constant 4 : index
+
+    // Initialize input buffer.
+    %buf0_vals = arith.constant dense<[0, 1]> : vector<2xi32>
+    %buf1_vals = arith.constant dense<[2, 3]> : vector<2xi32>
+    vector.store %buf0_vals, %buf0[%idx0] : memref<2xi32>, vector<2xi32>
+    vector.store %buf1_vals, %buf1[%idx0] : memref<2xi32>, vector<2xi32>
+
+    // Initialize output buffer.
+    %value0 = arith.constant 0 : i32
+    %buf3 = memref.cast %buf2 : memref<4xi32> to memref<?xi32>
+    call @fillResource1DInt(%buf3, %value0) : (memref<?xi32>, i32) -> ()
+
+    gpu.launch_func @kernels::@kernel_vector_interleave
+        blocks in (%idx4, %idx1, %idx1) threads in (%idx1, %idx1, %idx1)
+        args(%buf0 : memref<2xi32>, %buf1 : memref<2xi32>, %buf2 : memref<4xi32>)
+    %buf4 = memref.cast %buf3 : memref<?xi32> to memref<*xi32>
+    call @printMemrefI32(%buf4) : (memref<*xi32>) -> ()
+    return
+  }
+  func.func private @fillResource1DInt(%0 : memref<?xi32>, %1 : i32)
+  func.func private @printMemrefI32(%ptr : memref<*xi32>)
+}
diff --git a/mlir/test/mlir-vulkan-runner/vector-shuffle.mlir b/mlir/test/mlir-vulkan-runner/vector-shuffle.mlir
new file mode 100644
index 0000000000000..7cf53b54590bc
--- /dev/null
+++ b/mlir/test/mlir-vulkan-runner/vector-shuffle.mlir
@@ -0,0 +1,79 @@
+// RUN: mlir-vulkan-runner %s \
+// RUN:  --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
+// RUN:  --entry-point-result=void | FileCheck %s
+
+// CHECK: [2, 1, 3, 3]
+module attributes {
+  gpu.container_module,
+  spirv.target_env = #spirv.target_env<
+    #spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>
+} {
+  gpu.module @kernels {
+    gpu.func @kernel_vector_shuffle(%arg0 : memref<2xi32>, %arg1 : memref<2xi32>, %arg2 : memref<4xi32>)
+      kernel attributes { spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [1, 1, 1]>} {
+      %idx0 = arith.constant 0 : index
+      %idx1 = arith.constant 1 : index
+      %idx2 = arith.constant 2 : index
+      %idx3 = arith.constant 3 : index
+      %idx4 = arith.constant 4 : index
+
+      %lhs = arith.constant dense<[0, 0]> : vector<2xi32>
+      %rhs = arith.constant dense<[0, 0]> : vector<2xi32>
+
+      %val0 = memref.load %arg0[%idx0] : memref<2xi32>
+      %val1 = memref.load %arg0[%idx1] : memref<2xi32>
+      %val2 = memref.load %arg1[%idx0] : memref<2xi32>
+      %val3 = memref.load %arg1[%idx1] : memref<2xi32>
+
+      %lhs0 = vector.insertelement %val0, %lhs[%idx0 : index] : vector<2xi32>
+      %lhs1 = vector.insertelement %val1, %lhs0[%idx1 : index] : vector<2xi32>
+      %rhs0 = vector.insertelement %val2, %rhs[%idx0 : index] : vector<2xi32>
+      %rhs1 = vector.insertelement %val3, %rhs0[%idx1 : index] : vector<2xi32>
+
+      %shuffle = vector.shuffle %lhs1, %rhs1[2, 1, 3, 3] : vector<2xi32>, vector<2xi32>
+
+      %res0 = vector.extractelement %shuffle[%idx0 : index] : vector<4xi32>
+      %res1 = vector.extractelement %shuffle[%idx1 : index] : vector<4xi32>
+      %res2 = vector.extractelement %shuffle[%idx2 : index] : vector<4xi32>
+      %res3 = vector.extractelement %shuffle[%idx3 : index] : vector<4xi32>
+
+      memref.store %res0, %arg2[%idx0]: memref<4xi32>
+      memref.store %res1, %arg2[%idx1]: memref<4xi32>
+      memref.store %res2, %arg2[%idx2]: memref<4xi32>
+      memref.store %res3, %arg2[%idx3]: memref<4xi32>
+
+      gpu.return
+    }
+  }
+
+  func.func @main() {
+    // Allocate 3 buffers.
+    %buf0 = memref.alloc() : memref<2xi32>
+    %buf1 = memref.alloc() : memref<2xi32>
+    %buf2 = memref.alloc() : memref<4xi32>
+
+    %idx0 = arith.constant 0 : index
+    %idx1 = arith.constant 1 : index
+    %idx4 = arith.constant 4 : index
+
+    // Initialize input buffer.
+    %buf0_vals = arith.constant dense<[0, 1]> : vector<2xi32>
+    %buf1_vals = arith.constant dense<[2, 3]> : vector<2xi32>
+    vector.store %buf0_vals, %buf0[%idx0] : memref<2xi32>, vector<2xi32>
+    vector.store %buf1_vals, %buf1[%idx0] : memref<2xi32>, vector<2xi32>
+
+    // Initialize output buffer.
+    %value0 = arith.constant 0 : i32
+    %buf3 = memref.cast %buf2 : memref<4xi32> to memref<?xi32>
+    call @fillResource1DInt(%buf3, %value0) : (memref<?xi32>, i32) -> ()
+
+    gpu.launch_func @kernels::@kernel_vector_shuffle
+        blocks in (%idx4, %idx1, %idx1) threads in (%idx1, %idx1, %idx1)
+        args(%buf0 : memref<2xi32>, %buf1 : memref<2xi32>, %buf2 : memref<4xi32>)
+    %buf4 = memref.cast %buf3 : memref<?xi32> to memref<*xi32>
+    call @printMemrefI32(%buf4) : (memref<*xi32>) -> ()
+    return
+  }
+  func.func private @fillResource1DInt(%0 : memref<?xi32>, %1 : i32)
+  func.func private @printMemrefI32(%ptr : memref<*xi32>)
+}

@kuhar kuhar merged commit b301a98 into llvm:main Jun 3, 2024
7 checks passed
@angelz913 angelz913 deleted the vector-interleave-shuffle-e2e-fix branch June 6, 2024 19:37
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants