Simplify OpenAI reasoning model specific arguments to OpenAI API

debanjum · debanjum · commit d74c3a1db4db · 2025-03-19T21:12:02.000+05:30
Previously OpenAI reasoning models didn't support stream_options and
response_format

Add reasoning_effort arg for calls to OpenAI reasoning models via API.
Right now it defaults to medium but can be changed to low or high
diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py
@@ -60,20 +60,13 @@ def completion_with_backoff(
 
     formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
 
-    # Update request parameters for compatability with o1 model series
-    # Refer: https://platform.openai.com/docs/guides/reasoning/beta-limitations
-    stream = True
-    model_kwargs["stream_options"] = {"include_usage": True}
-    if model_name == "o1":
-        temperature = 1
-        stream = False
-        model_kwargs.pop("stream_options", None)
-    elif model_name.startswith("o1"):
-        temperature = 1
-        model_kwargs.pop("response_format", None)
-    elif model_name.startswith("o3-"):
+    # Tune reasoning models arguments
+    if model_name.startswith("o1") or model_name.startswith("o3"):
         temperature = 1
+        model_kwargs["reasoning_effort"] = "medium"
 
+    stream = True
+    model_kwargs["stream_options"] = {"include_usage": True}
     if os.getenv("KHOJ_LLM_SEED"):
         model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
 
@@ -172,20 +165,13 @@ def llm_thread(
 
         formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
 
-        # Update request parameters for compatability with o1 model series
-        # Refer: https://platform.openai.com/docs/guides/reasoning/beta-limitations
-        stream = True
-        model_kwargs["stream_options"] = {"include_usage": True}
-        if model_name == "o1":
-            temperature = 1
-            stream = False
-            model_kwargs.pop("stream_options", None)
-        elif model_name.startswith("o1-"):
+        # Tune reasoning models arguments
+        if model_name.startswith("o1"):
             temperature = 1
-            model_kwargs.pop("response_format", None)
-        elif model_name.startswith("o3-"):
+        elif model_name.startswith("o3"):
             temperature = 1
-            # Get the first system message and add the string `Formatting re-enabled` to it. See https://platform.openai.com/docs/guides/reasoning-best-practices
+            # Get the first system message and add the string `Formatting re-enabled` to it.
+            # See https://platform.openai.com/docs/guides/reasoning-best-practices
             if len(formatted_messages) > 0:
                 system_messages = [
                     (i, message) for i, message in enumerate(formatted_messages) if message["role"] == "system"
@@ -195,7 +181,6 @@ def llm_thread(
                     formatted_messages[first_system_message_index][
                         "content"
                     ] = f"{first_system_message} Formatting re-enabled"
-
         elif model_name.startswith("deepseek-reasoner"):
             # Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
             # The first message should always be a user message (except system message).
@@ -210,6 +195,8 @@ def llm_thread(
 
             formatted_messages = updated_messages
 
+        stream = True
+        model_kwargs["stream_options"] = {"include_usage": True}
         if os.getenv("KHOJ_LLM_SEED"):
             model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))