squash! llama : rename batch.logits to batch.output

danbev · danbev · commit 14a8a06aee60 · 2024-10-23T12:25:09.000+02:00
Update examples/batched.swift/Sources/main.swift,
examples/llama.android/llama/src/main/cpp/llama-android.cpp,
examples/llama.swiftui/llama.cpp.swift/LibLlama.swift to use the new
batch.output field instead of batch.logits.
diff --git a/examples/batched.swift/Sources/main.swift b/examples/batched.swift/Sources/main.swift
@@ -99,11 +99,11 @@ for (i, token) in tokens.enumerated() {
     if let seq_id = batch.seq_id[i] {
         seq_id[0] = 0
     }
-    batch.logits[i] = 0
+    batch.output[i] = 0
 }
 
 // llama_decode will output logits only for the last token of the prompt
-batch.logits[Int(batch.n_tokens) - 1] = 1
+batch.output[Int(batch.n_tokens) - 1] = 1
 
 if llama_decode(context, batch) != 0 {
     print("llama_decode() failed")
@@ -166,7 +166,7 @@ while n_cur <= n_len {
         if let seq_id = batch.seq_id[Int(batch.n_tokens)] {
             seq_id[0] = Int32(i)
         }
-        batch.logits[Int(batch.n_tokens)] = 1
+        batch.output[Int(batch.n_tokens)] = 1
 
         i_batch[i] = batch.n_tokens
 
diff --git a/examples/llama.android/llama/src/main/cpp/llama-android.cpp b/examples/llama.android/llama/src/main/cpp/llama-android.cpp
@@ -193,7 +193,7 @@ Java_android_llama_cpp_LLamaAndroid_bench_1model(
             common_batch_add(*batch, 0, i, { 0 }, false);
         }
 
-        batch->logits[batch->n_tokens - 1] = true;
+        batch->output[batch->n_tokens - 1] = true;
         llama_kv_cache_clear(context);
 
         const auto t_pp_start = ggml_time_us();
@@ -297,7 +297,7 @@ Java_android_llama_cpp_LLamaAndroid_new_1batch(JNIEnv *, jobject, jint n_tokens,
     for (int i = 0; i < n_tokens; ++i) {
         batch->seq_id[i] = (llama_seq_id *) malloc(sizeof(llama_seq_id) * n_seq_max);
     }
-    batch->logits   = (int8_t *)        malloc(sizeof(int8_t)         * n_tokens);
+    batch->output   = (int8_t *)        malloc(sizeof(int8_t)         * n_tokens);
 
     return reinterpret_cast<jlong>(batch);
 }
@@ -377,7 +377,7 @@ Java_android_llama_cpp_LLamaAndroid_completion_1init(
     }
 
     // llama_decode will output logits only for the last token of the prompt
-    batch->logits[batch->n_tokens - 1] = true;
+    batch->output[batch->n_tokens - 1] = true;
 
     if (llama_decode(context, *batch) != 0) {
         LOGe("llama_decode() failed");
diff --git a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift
@@ -137,7 +137,7 @@ actor LlamaContext {
             let i = Int(i1)
             llama_batch_add(&batch, tokens_list[i], Int32(i), [0], false)
         }
-        batch.logits[Int(batch.n_tokens) - 1] = 1 // true
+        batch.output[Int(batch.n_tokens) - 1] = 1 // true
 
         if llama_decode(context, batch) != 0 {
             print("llama_decode() failed")
@@ -206,7 +206,7 @@ actor LlamaContext {
             for i in 0..<n_tokens {
                 llama_batch_add(&batch, 0, Int32(i), [0], false)
             }
-            batch.logits[Int(batch.n_tokens) - 1] = 1 // true
+            batch.output[Int(batch.n_tokens) - 1] = 1 // true
 
             llama_kv_cache_clear(context)
 

Original file line number	Diff line number	Diff line change
`@@ -99,11 +99,11 @@ for (i, token) in tokens.enumerated() {`
`99`	`99`	`if let seq_id = batch.seq_id[i] {`
`100`	`100`	`seq_id[0] = 0`
`101`	`101`	`}`
`102`		`- batch.logits[i] = 0`
	`102`	`+ batch.output[i] = 0`
`103`	`103`	`}`
`104`	`104`
`105`	`105`	`// llama_decode will output logits only for the last token of the prompt`
`106`		`-batch.logits[Int(batch.n_tokens) - 1] = 1`
	`106`	`+batch.output[Int(batch.n_tokens) - 1] = 1`
`107`	`107`
`108`	`108`	`if llama_decode(context, batch) != 0 {`
`109`	`109`	`print("llama_decode() failed")`
`@@ -166,7 +166,7 @@ while n_cur <= n_len {`
`166`	`166`	`if let seq_id = batch.seq_id[Int(batch.n_tokens)] {`
`167`	`167`	`seq_id[0] = Int32(i)`
`168`	`168`	`}`
`169`		`- batch.logits[Int(batch.n_tokens)] = 1`
	`169`	`+ batch.output[Int(batch.n_tokens)] = 1`
`170`	`170`
`171`	`171`	`i_batch[i] = batch.n_tokens`
`172`	`172`
Original file line number	Diff line number	Diff line change
`@@ -193,7 +193,7 @@ Java_android_llama_cpp_LLamaAndroid_bench_1model(`
`193`	`193`	`common_batch_add(*batch, 0, i, { 0 }, false);`
`194`	`194`	`}`
`195`	`195`
`196`		`- batch->logits[batch->n_tokens - 1] = true;`
	`196`	`+ batch->output[batch->n_tokens - 1] = true;`
`197`	`197`	`llama_kv_cache_clear(context);`
`198`	`198`
`199`	`199`	`const auto t_pp_start = ggml_time_us();`
`@@ -297,7 +297,7 @@ Java_android_llama_cpp_LLamaAndroid_new_1batch(JNIEnv *, jobject, jint n_tokens,`
`297`	`297`	`for (int i = 0; i < n_tokens; ++i) {`
`298`	`298`	`batch->seq_id[i] = (llama_seq_id ) malloc(sizeof(llama_seq_id) n_seq_max);`
`299`	`299`	`}`
`300`		`- batch->logits = (int8_t ) malloc(sizeof(int8_t) n_tokens);`
	`300`	`+ batch->output = (int8_t ) malloc(sizeof(int8_t) n_tokens);`
`301`	`301`
`302`	`302`	`return reinterpret_cast<jlong>(batch);`
`303`	`303`	`}`
`@@ -377,7 +377,7 @@ Java_android_llama_cpp_LLamaAndroid_completion_1init(`
`377`	`377`	`}`
`378`	`378`
`379`	`379`	`// llama_decode will output logits only for the last token of the prompt`
`380`		`- batch->logits[batch->n_tokens - 1] = true;`
	`380`	`+ batch->output[batch->n_tokens - 1] = true;`
`381`	`381`
`382`	`382`	`if (llama_decode(context, *batch) != 0) {`
`383`	`383`	`LOGe("llama_decode() failed");`
Original file line number	Diff line number	Diff line change
`@@ -137,7 +137,7 @@ actor LlamaContext {`
`137`	`137`	`let i = Int(i1)`
`138`	`138`	`llama_batch_add(&batch, tokens_list[i], Int32(i), [0], false)`
`139`	`139`	`}`
`140`		`- batch.logits[Int(batch.n_tokens) - 1] = 1 // true`
	`140`	`+ batch.output[Int(batch.n_tokens) - 1] = 1 // true`
`141`	`141`
`142`	`142`	`if llama_decode(context, batch) != 0 {`
`143`	`143`	`print("llama_decode() failed")`
`@@ -206,7 +206,7 @@ actor LlamaContext {`
`206`	`206`	`for i in 0..<n_tokens {`
`207`	`207`	`llama_batch_add(&batch, 0, Int32(i), [0], false)`
`208`	`208`	`}`
`209`		`- batch.logits[Int(batch.n_tokens) - 1] = 1 // true`
	`209`	`+ batch.output[Int(batch.n_tokens) - 1] = 1 // true`
`210`	`210`
`211`	`211`	`llama_kv_cache_clear(context)`
`212`	`212`