Skip to content

Commit b9a5d91

Browse files
committed
Add additional comments
1 parent d0af2a1 commit b9a5d91

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

ggml-quants.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4876,6 +4876,7 @@ void ggml_vec_dot_q4_0_b16_q8_0_b16(int n, float * restrict s, size_t bs, const
48764876
__m128bh xd = m128bh(_mm_cvtepu16_epi32(_mm_set_epi64x(0, x_delta)));
48774877
__m128bh yd = m128bh(_mm_cvtepu16_epi32(_mm_set_epi64x(0, y_delta)));
48784878

4879+
// Computes product of delta values from four corresponding blocks
48794880
__m256 d = _mm256_castps128_ps256(_mm_dpbf16_ps(zerovec, xd, yd));
48804881
d = _mm256_permute2f128_ps(d ,d, 0);
48814882

@@ -6407,6 +6408,7 @@ void ggml_vec_dot_q8_0_b16_q8_0_b16(int n, float * restrict s, size_t bs, const
64076408
__m128bh xd = m128bh(_mm_cvtepu16_epi32(_mm_set_epi64x(0, x_delta)));
64086409
__m128bh yd = m128bh(_mm_cvtepu16_epi32(_mm_set_epi64x(0, y_delta)));
64096410

6411+
// Computes product of delta values from four corresponding blocks
64106412
__m256 d = _mm256_castps128_ps256(_mm_dpbf16_ps(zerovec, xd, yd));
64116413
d = _mm256_permute2f128_ps(d ,d, 0);
64126414

sgemm.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -982,6 +982,7 @@ class tinyBLAS_Q0_B16_AVX {
982982
}
983983

984984
#if defined(__AVX512BF16__)
985+
// Templated functions for gemm of dimesnions 4xN
985986
template <int RN>
986987
NOINLINE void gemm4xN(int64_t m0, int64_t m, int64_t n0, int64_t n) {
987988
int64_t ytiles = (m - m0) / 4;
@@ -1006,6 +1007,7 @@ class tinyBLAS_Q0_B16_AVX {
10061007
__m256i avec3 = load(A + lda * (ii + 3) + l);
10071008
for (int64_t j = 0; j < RN; ++j) {
10081009
__m128bh db = m128bh(_mm_set1_epi16(B[ldb * (jj + j) + l].d));
1010+
// Computation of product of delta values for four blocks
10091011
__m256 dvec = _mm256_castps128_ps256(_mm_dpbf16_ps(zerovec, da, db));
10101012
dvec = _mm256_permute2f128_ps(dvec ,dvec, 0);
10111013
Cv[j][0] = madd(_mm256_shuffle_ps(dvec, dvec, 0),
@@ -1057,7 +1059,8 @@ class tinyBLAS_Q0_B16_AVX {
10571059
__m256i bvec3 = load(B + ldb * (jj + 3) + l);
10581060
for (int64_t i = 0; i < RM; ++i) {
10591061
__m128bh da = m128bh(_mm_set1_epi16((A[lda * (ii + i) + l].d)));
1060-
__m256 dvec = _mm256_castps128_ps256(_mm_dpbf16_ps(zerovec, da, db));
1062+
// Computation of product of delta values for four blocks
1063+
__m256 dvec = _mm256_castps128_ps256(_mm_dpbf16_ps(zerovec, da, db));
10611064
dvec = _mm256_permute2f128_ps(dvec ,dvec, 0);
10621065
Cv[0][i] = madd(_mm256_shuffle_ps(dvec, dvec, 0),
10631066
updot(_mm256_sign_epi8(load(A + lda * (ii + i) + l),

0 commit comments

Comments
 (0)