Skip to content

Commit d63d430

Browse files
committed
Update on "[ET-VK] Modify quantized linear naive shader to linearly dispatch work to improve performance."
This diff changes naive quantized linear mat mul op to use push constant instead of uniform buffers and change dispatch pattern to linear to improve performance. Differential Revision: [D72862490](https://our.internmc.facebook.com/intern/diff/D72862490/) [ghstack-poisoned]
2 parents 83fe778 + 724e644 commit d63d430

File tree

1 file changed

+13
-10
lines changed

1 file changed

+13
-10
lines changed

backends/vulkan/runtime/graph/ops/impl/QuantizedLinearInt8.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -100,19 +100,22 @@ void add_q_8w_linear_node(
100100

101101
std::vector<PushConstantDataInfo> pcs;
102102
if (graph.is_buffer_storage(out_W_packed)) {
103-
pcs = {graph.sizes_pc_of(out_W_packed),
104-
graph.strides_pc_of(out_W_packed),
105-
graph.sizes_pc_of(mat1_W_packed),
106-
graph.strides_pc_of(mat1),
107-
graph.strides_pc_of(q_mat2),
108-
graph.strides_pc_of(scales),
109-
graph.numel_pc_of(out_W_packed)};
103+
pcs = {
104+
graph.sizes_pc_of(out_W_packed),
105+
graph.strides_pc_of(out_W_packed),
106+
graph.sizes_pc_of(mat1_W_packed),
107+
graph.strides_pc_of(mat1),
108+
graph.strides_pc_of(q_mat2),
109+
graph.strides_pc_of(scales),
110+
graph.numel_pc_of(out_W_packed)};
110111
} else {
111-
pcs = {graph.logical_limits_pc_of(out_W_packed),
112-
graph.sizes_pc_of(mat1_W_packed)};
112+
pcs = {
113+
graph.logical_limits_pc_of(out_W_packed),
114+
graph.sizes_pc_of(mat1_W_packed)};
113115
}
114116

115-
const utils::uvec3 global_wg = {static_cast<uint32_t>(graph.numel_of(out_W_packed)), 1, 1};
117+
const utils::uvec3 global_wg = {
118+
static_cast<uint32_t>(graph.numel_of(out_W_packed)), 1, 1};
116119
const utils::uvec3 local_wg{64, 1, 1};
117120

118121
graph.execute_nodes().emplace_back(new DispatchNode(

0 commit comments

Comments
 (0)