Skip to content

Commit 57627f0

Browse files
committed
Rebase to master
1 parent c45868b commit 57627f0

File tree

2 files changed

+33
-29
lines changed

2 files changed

+33
-29
lines changed

ggml.c

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1420,6 +1420,34 @@ static void dequantize_row_q4_1(const void * restrict vx, float * restrict y, in
14201420
#endif
14211421
}
14221422

1423+
static void ggml_vec_dot_q4_1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy);
1424+
static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy);
1425+
1426+
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
1427+
[GGML_TYPE_Q4_0] = {
1428+
.dequantize_row_q = dequantize_row_q4_0,
1429+
.quantize_row_q = quantize_row_q4_0,
1430+
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
1431+
.quantize_row_q_dot = quantize_row_q8_0,
1432+
.vec_dot_q = ggml_vec_dot_q4_0_q8_0,
1433+
},
1434+
[GGML_TYPE_Q4_1] = {
1435+
.dequantize_row_q = dequantize_row_q4_1,
1436+
.quantize_row_q = quantize_row_q4_1,
1437+
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
1438+
.quantize_row_q_dot = quantize_row_q4_1,
1439+
.vec_dot_q = ggml_vec_dot_q4_1,
1440+
},
1441+
// TODO: GGML_TYPE_Q8_0
1442+
};
1443+
1444+
// For internal test use
1445+
quantize_fns_t ggml_internal_get_quantize_fn(size_t i) {
1446+
GGML_ASSERT(i < GGML_TYPE_COUNT);
1447+
return quantize_fns[i];
1448+
}
1449+
1450+
14231451
//
14241452
// simd mappings
14251453
//
@@ -5910,12 +5938,12 @@ static void ggml_compute_forward_add_q_f32(
59105938
const int64_t ne03 = src0->ne[3];
59115939

59125940
//const int64_t ne10 = src1->ne[0];
5913-
const int64_t ne11 = src1->ne[1];
5941+
//const int64_t ne11 = src1->ne[1];
59145942
const int64_t ne12 = src1->ne[2];
59155943
const int64_t ne13 = src1->ne[3];
59165944

5917-
const int64_t ne0 = dst->ne[0];
5918-
const int64_t ne1 = dst->ne[1];
5945+
//const int64_t ne0 = dst->ne[0];
5946+
//const int64_t ne1 = dst->ne[1];
59195947
const int64_t ne2 = dst->ne[2];
59205948
const int64_t ne3 = dst->ne[3];
59215949

@@ -7307,30 +7335,6 @@ static void ggml_compute_forward_mul_mat_f16_f32(
73077335
//}
73087336
}
73097337

7310-
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
7311-
[GGML_TYPE_Q4_0] = {
7312-
.dequantize_row_q = dequantize_row_q4_0,
7313-
.quantize_row_q = quantize_row_q4_0,
7314-
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
7315-
.quantize_row_q_dot = quantize_row_q8_0,
7316-
.vec_dot_q = ggml_vec_dot_q4_0_q8_0,
7317-
},
7318-
[GGML_TYPE_Q4_1] = {
7319-
.dequantize_row_q = dequantize_row_q4_1,
7320-
.quantize_row_q = quantize_row_q4_1,
7321-
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
7322-
.quantize_row_q_dot = quantize_row_q4_1,
7323-
.vec_dot_q = ggml_vec_dot_q4_1,
7324-
},
7325-
// TODO: GGML_TYPE_Q8_0
7326-
};
7327-
7328-
// For internal test use
7329-
quantize_fns_t ggml_internal_get_quantize_fn(size_t i) {
7330-
GGML_ASSERT(i < GGML_TYPE_COUNT);
7331-
return quantize_fns[i];
7332-
}
7333-
73347338
static void ggml_compute_forward_mul_mat_q_f32(
73357339
const struct ggml_compute_params * params,
73367340
const struct ggml_tensor * src0,

llama.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1896,8 +1896,8 @@ int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lor
18961896
ggml_tensor * loraA = lora_tensors[base_name + ".loraA"];
18971897
ggml_tensor * loraB = lora_tensors[base_name + ".loraB"];
18981898

1899-
if (tensor->ne[0] != loraA->ne[1]) {
1900-
fprintf(stderr, "%s: incompatible tensor dimensions (%" PRId64 " and %" PRId64 ");"
1899+
if (tensor->ne[0] != loraA->ne[1] || tensor->ne[1] != loraB->ne[1]) {
1900+
fprintf(stderr, "%s: incompatible tensor dimensions (%" PRId64 " and %" PRId64 ");"
19011901
" are you sure that this adapter is for this model?\n", __func__, tensor->ne[0], loraA->ne[1]);
19021902
return 1;
19031903
}

0 commit comments

Comments
 (0)