Skip to content

Commit 35faf60

Browse files
committed
Update on "[ET-VK] Minor improvement to permute op."
This change reduces the complexity of boundary comparison in permute op to improve speed. Differential Revision: [D72866962](https://our.internmc.facebook.com/intern/diff/D72866962/) [ghstack-poisoned]
2 parents 45d27ee + d7c4d86 commit 35faf60

File tree

1 file changed

+13
-10
lines changed

1 file changed

+13
-10
lines changed

backends/vulkan/runtime/graph/ops/impl/QuantizedLinearInt8.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -100,19 +100,22 @@ void add_q_8w_linear_node(
100100

101101
std::vector<PushConstantDataInfo> pcs;
102102
if (graph.is_buffer_storage(out_W_packed)) {
103-
pcs = {graph.sizes_pc_of(out_W_packed),
104-
graph.strides_pc_of(out_W_packed),
105-
graph.sizes_pc_of(mat1_W_packed),
106-
graph.strides_pc_of(mat1),
107-
graph.strides_pc_of(q_mat2),
108-
graph.strides_pc_of(scales),
109-
graph.numel_pc_of(out_W_packed)};
103+
pcs = {
104+
graph.sizes_pc_of(out_W_packed),
105+
graph.strides_pc_of(out_W_packed),
106+
graph.sizes_pc_of(mat1_W_packed),
107+
graph.strides_pc_of(mat1),
108+
graph.strides_pc_of(q_mat2),
109+
graph.strides_pc_of(scales),
110+
graph.numel_pc_of(out_W_packed)};
110111
} else {
111-
pcs = {graph.logical_limits_pc_of(out_W_packed),
112-
graph.sizes_pc_of(mat1_W_packed)};
112+
pcs = {
113+
graph.logical_limits_pc_of(out_W_packed),
114+
graph.sizes_pc_of(mat1_W_packed)};
113115
}
114116

115-
const utils::uvec3 global_wg = {static_cast<uint32_t>(graph.numel_of(out_W_packed)), 1, 1};
117+
const utils::uvec3 global_wg = {
118+
static_cast<uint32_t>(graph.numel_of(out_W_packed)), 1, 1};
116119
const utils::uvec3 local_wg{64, 1, 1};
117120

118121
graph.execute_nodes().emplace_back(new DispatchNode(

0 commit comments

Comments
 (0)