@@ -60,20 +60,13 @@ def completion_with_backoff(
60
60
61
61
formatted_messages = [{"role" : message .role , "content" : message .content } for message in messages ]
62
62
63
- # Update request parameters for compatability with o1 model series
64
- # Refer: https://platform.openai.com/docs/guides/reasoning/beta-limitations
65
- stream = True
66
- model_kwargs ["stream_options" ] = {"include_usage" : True }
67
- if model_name == "o1" :
68
- temperature = 1
69
- stream = False
70
- model_kwargs .pop ("stream_options" , None )
71
- elif model_name .startswith ("o1" ):
72
- temperature = 1
73
- model_kwargs .pop ("response_format" , None )
74
- elif model_name .startswith ("o3-" ):
63
+ # Tune reasoning models arguments
64
+ if model_name .startswith ("o1" ) or model_name .startswith ("o3" ):
75
65
temperature = 1
66
+ model_kwargs ["reasoning_effort" ] = "medium"
76
67
68
+ stream = True
69
+ model_kwargs ["stream_options" ] = {"include_usage" : True }
77
70
if os .getenv ("KHOJ_LLM_SEED" ):
78
71
model_kwargs ["seed" ] = int (os .getenv ("KHOJ_LLM_SEED" ))
79
72
@@ -172,20 +165,13 @@ def llm_thread(
172
165
173
166
formatted_messages = [{"role" : message .role , "content" : message .content } for message in messages ]
174
167
175
- # Update request parameters for compatability with o1 model series
176
- # Refer: https://platform.openai.com/docs/guides/reasoning/beta-limitations
177
- stream = True
178
- model_kwargs ["stream_options" ] = {"include_usage" : True }
179
- if model_name == "o1" :
180
- temperature = 1
181
- stream = False
182
- model_kwargs .pop ("stream_options" , None )
183
- elif model_name .startswith ("o1-" ):
168
+ # Tune reasoning models arguments
169
+ if model_name .startswith ("o1" ):
184
170
temperature = 1
185
- model_kwargs .pop ("response_format" , None )
186
- elif model_name .startswith ("o3-" ):
171
+ elif model_name .startswith ("o3" ):
187
172
temperature = 1
188
- # Get the first system message and add the string `Formatting re-enabled` to it. See https://platform.openai.com/docs/guides/reasoning-best-practices
173
+ # Get the first system message and add the string `Formatting re-enabled` to it.
174
+ # See https://platform.openai.com/docs/guides/reasoning-best-practices
189
175
if len (formatted_messages ) > 0 :
190
176
system_messages = [
191
177
(i , message ) for i , message in enumerate (formatted_messages ) if message ["role" ] == "system"
@@ -195,7 +181,6 @@ def llm_thread(
195
181
formatted_messages [first_system_message_index ][
196
182
"content"
197
183
] = f"{ first_system_message } Formatting re-enabled"
198
-
199
184
elif model_name .startswith ("deepseek-reasoner" ):
200
185
# Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
201
186
# The first message should always be a user message (except system message).
@@ -210,6 +195,8 @@ def llm_thread(
210
195
211
196
formatted_messages = updated_messages
212
197
198
+ stream = True
199
+ model_kwargs ["stream_options" ] = {"include_usage" : True }
213
200
if os .getenv ("KHOJ_LLM_SEED" ):
214
201
model_kwargs ["seed" ] = int (os .getenv ("KHOJ_LLM_SEED" ))
215
202
0 commit comments