@@ -1420,6 +1420,34 @@ static void dequantize_row_q4_1(const void * restrict vx, float * restrict y, in
1420
1420
#endif
1421
1421
}
1422
1422
1423
+ static void ggml_vec_dot_q4_1 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
1424
+ static void ggml_vec_dot_q4_0_q8_0 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
1425
+
1426
+ static const quantize_fns_t quantize_fns [GGML_TYPE_COUNT ] = {
1427
+ [GGML_TYPE_Q4_0 ] = {
1428
+ .dequantize_row_q = dequantize_row_q4_0 ,
1429
+ .quantize_row_q = quantize_row_q4_0 ,
1430
+ .quantize_row_q_reference = (quantize_row_q_t ) quantize_row_q4_0_reference ,
1431
+ .quantize_row_q_dot = quantize_row_q8_0 ,
1432
+ .vec_dot_q = ggml_vec_dot_q4_0_q8_0 ,
1433
+ },
1434
+ [GGML_TYPE_Q4_1 ] = {
1435
+ .dequantize_row_q = dequantize_row_q4_1 ,
1436
+ .quantize_row_q = quantize_row_q4_1 ,
1437
+ .quantize_row_q_reference = (quantize_row_q_t ) quantize_row_q4_1_reference ,
1438
+ .quantize_row_q_dot = quantize_row_q4_1 ,
1439
+ .vec_dot_q = ggml_vec_dot_q4_1 ,
1440
+ },
1441
+ // TODO: GGML_TYPE_Q8_0
1442
+ };
1443
+
1444
+ // For internal test use
1445
+ quantize_fns_t ggml_internal_get_quantize_fn (size_t i ) {
1446
+ GGML_ASSERT (i < GGML_TYPE_COUNT );
1447
+ return quantize_fns [i ];
1448
+ }
1449
+
1450
+
1423
1451
//
1424
1452
// simd mappings
1425
1453
//
@@ -5910,12 +5938,12 @@ static void ggml_compute_forward_add_q_f32(
5910
5938
const int64_t ne03 = src0 -> ne [3 ];
5911
5939
5912
5940
//const int64_t ne10 = src1->ne[0];
5913
- const int64_t ne11 = src1 -> ne [1 ];
5941
+ // const int64_t ne11 = src1->ne[1];
5914
5942
const int64_t ne12 = src1 -> ne [2 ];
5915
5943
const int64_t ne13 = src1 -> ne [3 ];
5916
5944
5917
- const int64_t ne0 = dst -> ne [0 ];
5918
- const int64_t ne1 = dst -> ne [1 ];
5945
+ // const int64_t ne0 = dst->ne[0];
5946
+ // const int64_t ne1 = dst->ne[1];
5919
5947
const int64_t ne2 = dst -> ne [2 ];
5920
5948
const int64_t ne3 = dst -> ne [3 ];
5921
5949
@@ -7307,30 +7335,6 @@ static void ggml_compute_forward_mul_mat_f16_f32(
7307
7335
//}
7308
7336
}
7309
7337
7310
- static const quantize_fns_t quantize_fns [GGML_TYPE_COUNT ] = {
7311
- [GGML_TYPE_Q4_0 ] = {
7312
- .dequantize_row_q = dequantize_row_q4_0 ,
7313
- .quantize_row_q = quantize_row_q4_0 ,
7314
- .quantize_row_q_reference = (quantize_row_q_t ) quantize_row_q4_0_reference ,
7315
- .quantize_row_q_dot = quantize_row_q8_0 ,
7316
- .vec_dot_q = ggml_vec_dot_q4_0_q8_0 ,
7317
- },
7318
- [GGML_TYPE_Q4_1 ] = {
7319
- .dequantize_row_q = dequantize_row_q4_1 ,
7320
- .quantize_row_q = quantize_row_q4_1 ,
7321
- .quantize_row_q_reference = (quantize_row_q_t ) quantize_row_q4_1_reference ,
7322
- .quantize_row_q_dot = quantize_row_q4_1 ,
7323
- .vec_dot_q = ggml_vec_dot_q4_1 ,
7324
- },
7325
- // TODO: GGML_TYPE_Q8_0
7326
- };
7327
-
7328
- // For internal test use
7329
- quantize_fns_t ggml_internal_get_quantize_fn (size_t i ) {
7330
- GGML_ASSERT (i < GGML_TYPE_COUNT );
7331
- return quantize_fns [i ];
7332
- }
7333
-
7334
7338
static void ggml_compute_forward_mul_mat_q_f32 (
7335
7339
const struct ggml_compute_params * params ,
7336
7340
const struct ggml_tensor * src0 ,
0 commit comments