Skip to content

Commit a8bdd65

Browse files
m18coppolaMichael Coppola
and
Michael Coppola
authored
server : add parameter -tb N, --threads-batch N (#3584)
Co-authored-by: Michael Coppola <[email protected]>
1 parent 70c29da commit a8bdd65

File tree

1 file changed

+19
-9
lines changed

1 file changed

+19
-9
lines changed

examples/server/server.cpp

+19-9
Original file line numberDiff line numberDiff line change
@@ -714,15 +714,16 @@ static void server_print_usage(const char *argv0, const gpt_params &params,
714714
printf("usage: %s [options]\n", argv0);
715715
printf("\n");
716716
printf("options:\n");
717-
printf(" -h, --help show this help message and exit\n");
718-
printf(" -v, --verbose verbose output (default: %s)\n", server_verbose ? "enabled" : "disabled");
719-
printf(" -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
720-
printf(" -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
721-
printf(" --rope-freq-base N RoPE base frequency (default: loaded from model)\n");
722-
printf(" --rope-freq-scale N RoPE frequency scaling factor (default: loaded from model)\n");
723-
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
724-
printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
725-
printf(" not recommended: doubles context memory required and no measurable increase in quality\n");
717+
printf(" -h, --help show this help message and exit\n");
718+
printf(" -v, --verbose verbose output (default: %s)\n", server_verbose ? "enabled" : "disabled");
719+
printf(" -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
720+
printf(" -tb N, --threads-batch N number of threads to use during batch and prompt processing (default: same as --threads)\n");
721+
printf(" -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
722+
printf(" --rope-freq-base N RoPE base frequency (default: loaded from model)\n");
723+
printf(" --rope-freq-scale N RoPE frequency scaling factor (default: loaded from model)\n");
724+
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
725+
printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
726+
printf(" not recommended: doubles context memory required and no measurable increase in quality\n");
726727
if (llama_mlock_supported())
727728
{
728729
printf(" --mlock force system to keep model in RAM rather than swapping or compressing\n");
@@ -867,6 +868,15 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
867868
}
868869
params.n_threads = std::stoi(argv[i]);
869870
}
871+
else if (arg == "--threads-batch" || arg == "-tb")
872+
{
873+
if (++i >= argc)
874+
{
875+
invalid_param = true;
876+
break;
877+
}
878+
params.n_threads_batch = std::stoi(argv[i]);
879+
}
870880
else if (arg == "-b" || arg == "--batch-size")
871881
{
872882
if (++i >= argc)

0 commit comments

Comments
 (0)