Skip to content

Commit 3f82215

Browse files
committed
common : by default, move the penalties at the end of the sampling chain
ggml-ci
1 parent 3b47c3f commit 3f82215

File tree

2 files changed

+12
-9
lines changed

2 files changed

+12
-9
lines changed

common/common.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ struct common_params_sampling {
146146
COMMON_SAMPLER_TYPE_MIN_P,
147147
COMMON_SAMPLER_TYPE_XTC,
148148
COMMON_SAMPLER_TYPE_TEMPERATURE,
149+
COMMON_SAMPLER_TYPE_PENALTIES,
149150
};
150151

151152
std::string grammar; // optional BNF-like grammar to constrain sampling
@@ -193,11 +194,13 @@ struct common_params {
193194
float defrag_thold = 0.1f; // KV cache defragmentation threshold
194195

195196
// offload params
196-
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
197-
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
198-
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
199-
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
200-
enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
197+
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
198+
199+
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
200+
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
201+
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
202+
203+
enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
201204

202205
struct cpu_params cpuparams;
203206
struct cpu_params cpuparams_batch;

common/sampling.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,17 +164,17 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
164164
if (params.mirostat == 0) {
165165
for (const auto & cnstr : params.samplers) {
166166
switch (cnstr) {
167-
case COMMON_SAMPLER_TYPE_DRY:
167+
case COMMON_SAMPLER_TYPE_DRY:
168168
{
169-
std::vector<const char*> c_breakers;
169+
std::vector<const char *> c_breakers;
170170
c_breakers.reserve(params.dry_sequence_breakers.size());
171-
for (const auto& str : params.dry_sequence_breakers) {
171+
for (const auto & str : params.dry_sequence_breakers) {
172172
c_breakers.push_back(str.c_str());
173173
}
174174

175175
llama_sampler_chain_add(result->chain, llama_sampler_init_dry (model, params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
176176
}
177-
break;
177+
break;
178178
case COMMON_SAMPLER_TYPE_TOP_K:
179179
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
180180
break;

0 commit comments

Comments
 (0)