Merge branch 'main' into main

Rutledge · web-flow · commit f8655c3b448d · 2025-03-12T17:24:39.000-07:00
diff --git a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md
@@ -0,0 +1,18 @@
+### Summary
+
+<!-- Please give a short summary of the change and the problem this solves. -->
+
+### Test plan
+
+<!-- Please explain how this was tested -->
+
+### Issue number
+
+<!-- For example: "Closes #1234" -->
+
+### Checks
+
+- [ ] I've added new tests (if relevant)
+- [ ] I've added/updated the relevant documentation
+- [ ] I've run `make lint` and `make format`
+- [ ] I've made sure tests pass
diff --git a/README.md b/README.md
@@ -140,7 +140,7 @@ The Agents SDK is designed to be highly flexible, allowing you to model a wide r
 
 ## Tracing
 
-The Agents SDK automatically traces your agent runs, making it easy to track and debug the behavior of your agents. Tracing is extensible by design, supporting custom spans and a wide variety of external destinations, including [Logfire](https://logfire.pydantic.dev/docs/integrations/llms/openai/#openai-agents), [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk), [Braintrust](https://braintrust.dev/docs/guides/traces/integrations#openai-agents-sdk), and [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration). For more details about how to customize or disable tracing, see [Tracing](http://openai.github.io/openai-agents-python/tracing).
+The Agents SDK automatically traces your agent runs, making it easy to track and debug the behavior of your agents. Tracing is extensible by design, supporting custom spans and a wide variety of external destinations, including [Logfire](https://logfire.pydantic.dev/docs/integrations/llms/openai/#openai-agents), [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk), [Braintrust](https://braintrust.dev/docs/guides/traces/integrations#openai-agents-sdk), [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration), and [Keywords AI](https://docs.keywordsai.co/integration/development-frameworks/openai-agent). For more details about how to customize or disable tracing, see [Tracing](http://openai.github.io/openai-agents-python/tracing).
 
 ## Development (only needed if you need to edit the SDK/examples)
 
diff --git a/docs/agents.md b/docs/agents.md
@@ -13,14 +13,15 @@ The most common properties of an agent you'll configure are:
 ```python
 from agents import Agent, ModelSettings, function_tool
 
+@function_tool
 def get_weather(city: str) -> str:
     return f"The weather in {city} is sunny"
 
 agent = Agent(
     name="Haiku agent",
     instructions="Always respond in haiku form",
     model="o3-mini",
-    tools=[function_tool(get_weather)],
+    tools=[get_weather],
 )
 ```
 
diff --git a/docs/context.md b/docs/context.md
@@ -36,6 +36,7 @@ class UserInfo:  # (1)!
     name: str
     uid: int
 
+@function_tool
 async def fetch_user_age(wrapper: RunContextWrapper[UserInfo]) -> str:  # (2)!
     return f"User {wrapper.context.name} is 47 years old"
 
@@ -44,7 +45,7 @@ async def main():
 
     agent = Agent[UserInfo](  # (4)!
         name="Assistant",
-        tools=[function_tool(fetch_user_age)],
+        tools=[fetch_user_age],
     )
 
     result = await Runner.run(
diff --git a/docs/running_agents.md b/docs/running_agents.md
@@ -78,7 +78,7 @@ async def main():
         # San Francisco
 
         # Second turn
-        new_input = output.to_input_list() + [{"role": "user", "content": "What state is it in?"}]
+        new_input = result.to_input_list() + [{"role": "user", "content": "What state is it in?"}]
         result = await Runner.run(agent, new_input)
         print(result.final_output)
         # California
diff --git a/docs/tracing.md b/docs/tracing.md
@@ -50,7 +50,7 @@ async def main():
 
     with trace("Joke workflow"): # (1)!
         first_result = await Runner.run(agent, "Tell me a joke")
-        second_result = await Runner.run(agent, f"Rate this joke: {first_output.final_output}")
+        second_result = await Runner.run(agent, f"Rate this joke: {first_result.final_output}")
         print(f"Joke: {first_result.final_output}")
         print(f"Rating: {second_result.final_output}")
 ```
@@ -94,3 +94,4 @@ External trace processors include:
 -   [Pydantic Logfire](https://logfire.pydantic.dev/docs/integrations/llms/openai/#openai-agents)
 -   [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk)
 -   [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration))
+-   [Keywords AI](https://docs.keywordsai.co/integration/development-frameworks/openai-agent)
diff --git a/examples/agent_patterns/input_guardrails.py b/examples/agent_patterns/input_guardrails.py
@@ -53,7 +53,7 @@ async def math_guardrail(
 
     return GuardrailFunctionOutput(
         output_info=final_output,
-        tripwire_triggered=not final_output.is_math_homework,
+        tripwire_triggered=final_output.is_math_homework,
     )
 
 
diff --git a/src/agents/guardrail.py b/src/agents/guardrail.py
@@ -86,7 +86,7 @@ class InputGuardrail(Generic[TContext]):
         [RunContextWrapper[TContext], Agent[Any], str | list[TResponseInputItem]],
         MaybeAwaitable[GuardrailFunctionOutput],
     ]
-    """A function that receives the the agent input and the context, and returns a
+    """A function that receives the agent input and the context, and returns a
      `GuardrailResult`. The result marks whether the tripwire was triggered, and can optionally
      include information about the guardrail's output.
     """
diff --git a/src/agents/model_settings.py b/src/agents/model_settings.py
@@ -10,15 +10,34 @@ class ModelSettings:
 
     This class holds optional model configuration parameters (e.g. temperature,
     top_p, penalties, truncation, etc.).
+
+    Not all models/providers support all of these parameters, so please check the API documentation
+    for the specific model and provider you are using.
     """
 
     temperature: float | None = None
+    """The temperature to use when calling the model."""
+
     top_p: float | None = None
+    """The top_p to use when calling the model."""
+
     frequency_penalty: float | None = None
+    """The frequency penalty to use when calling the model."""
+
     presence_penalty: float | None = None
+    """The presence penalty to use when calling the model."""
+
     tool_choice: Literal["auto", "required", "none"] | str | None = None
+    """The tool choice to use when calling the model."""
+
     parallel_tool_calls: bool | None = False
+    """Whether to use parallel tool calls when calling the model."""
+
     truncation: Literal["auto", "disabled"] | None = None
+    """The truncation strategy to use when calling the model."""
+
+    max_tokens: int | None = None
+    """The maximum number of output tokens to generate."""
 
     def resolve(self, override: ModelSettings | None) -> ModelSettings:
         """Produce a new ModelSettings by overlaying any non-None values from the
@@ -33,4 +52,5 @@ def resolve(self, override: ModelSettings | None) -> ModelSettings:
             tool_choice=override.tool_choice or self.tool_choice,
             parallel_tool_calls=override.parallel_tool_calls or self.parallel_tool_calls,
             truncation=override.truncation or self.truncation,
+            max_tokens=override.max_tokens or self.max_tokens,
         )
diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py
@@ -51,8 +51,10 @@
     ResponseOutputText,
     ResponseRefusalDeltaEvent,
     ResponseTextDeltaEvent,
+    ResponseUsage,
 )
 from openai.types.responses.response_input_param import FunctionCallOutput, ItemReference, Message
+from openai.types.responses.response_usage import OutputTokensDetails
 
 from .. import _debug
 from ..agent_output import AgentOutputSchema
@@ -405,7 +407,23 @@ async def stream_response(
             for function_call in state.function_calls.values():
                 outputs.append(function_call)
 
-            final_response = response.model_copy(update={"output": outputs, "usage": usage})
+            final_response = response.model_copy()
+            final_response.output = outputs
+            final_response.usage = (
+                ResponseUsage(
+                    input_tokens=usage.prompt_tokens,
+                    output_tokens=usage.completion_tokens,
+                    total_tokens=usage.total_tokens,
+                    output_tokens_details=OutputTokensDetails(
+                        reasoning_tokens=usage.completion_tokens_details.reasoning_tokens
+                        if usage.completion_tokens_details
+                        and usage.completion_tokens_details.reasoning_tokens
+                        else 0
+                    ),
+                )
+                if usage
+                else None
+            )
 
             yield ResponseCompletedEvent(
                 response=final_response,
@@ -503,6 +521,7 @@ async def _fetch_response(
             top_p=self._non_null_or_not_given(model_settings.top_p),
             frequency_penalty=self._non_null_or_not_given(model_settings.frequency_penalty),
             presence_penalty=self._non_null_or_not_given(model_settings.presence_penalty),
+            max_tokens=self._non_null_or_not_given(model_settings.max_tokens),
             tool_choice=tool_choice,
             response_format=response_format,
             parallel_tool_calls=parallel_tool_calls,
@@ -808,6 +827,13 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
                         "content": cls.extract_text_content(content),
                     }
                     result.append(msg_developer)
+                elif role == "assistant":
+                    flush_assistant_message()
+                    msg_assistant: ChatCompletionAssistantMessageParam = {
+                        "role": "assistant",
+                        "content": cls.extract_text_content(content),
+                    }
+                    result.append(msg_assistant)
                 else:
                     raise UserError(f"Unexpected role in easy_input_message: {role}")
 
diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py
@@ -235,6 +235,7 @@ async def _fetch_response(
             temperature=self._non_null_or_not_given(model_settings.temperature),
             top_p=self._non_null_or_not_given(model_settings.top_p),
             truncation=self._non_null_or_not_given(model_settings.truncation),
+            max_output_tokens=self._non_null_or_not_given(model_settings.max_tokens),
             tool_choice=tool_choice,
             parallel_tool_calls=parallel_tool_calls,
             stream=stream,
diff --git a/src/agents/result.py b/src/agents/result.py
@@ -216,5 +216,3 @@ def _cleanup_tasks(self):
 
         if self._output_guardrails_task and not self._output_guardrails_task.done():
             self._output_guardrails_task.cancel()
-            self._output_guardrails_task.cancel()
-            self._output_guardrails_task.cancel()
diff --git a/tests/test_openai_chatcompletions_converter.py b/tests/test_openai_chatcompletions_converter.py
@@ -393,3 +393,38 @@ def test_unknown_object_errors():
     with pytest.raises(UserError, match="Unhandled item type or structure"):
         # Purposely ignore the type error
         _Converter.items_to_messages([TestObject()])  # type: ignore
+
+
+def test_assistant_messages_in_history():
+    """
+    Test that assistant messages are added to the history.
+    """
+    messages = _Converter.items_to_messages(
+        [
+            {
+                "role": "user",
+                "content": "Hello",
+            },
+            {
+                "role": "assistant",
+                "content": "Hello?",
+            },
+            {
+                "role": "user",
+                "content": "What was my Name?",
+            },
+        ]
+    )
+
+    assert messages == [
+        {"role": "user", "content": "Hello"},
+        {"role": "assistant", "content": "Hello?"},
+        {"role": "user", "content": "What was my Name?"},
+    ]
+    assert len(messages) == 3
+    assert messages[0]["role"] == "user"
+    assert messages[0]["content"] == "Hello"
+    assert messages[1]["role"] == "assistant"
+    assert messages[1]["content"] == "Hello?"
+    assert messages[2]["role"] == "user"
+    assert messages[2]["content"] == "What was my Name?"
diff --git a/tests/test_openai_chatcompletions_stream.py b/tests/test_openai_chatcompletions_stream.py
@@ -107,6 +107,11 @@ async def patched_fetch_response(self, *args, **kwargs):
     assert isinstance(completed_resp.output[0].content[0], ResponseOutputText)
     assert completed_resp.output[0].content[0].text == "Hello"
 
+    assert completed_resp.usage, "usage should not be None"
+    assert completed_resp.usage.input_tokens == 7
+    assert completed_resp.usage.output_tokens == 5
+    assert completed_resp.usage.total_tokens == 12
+
 
 @pytest.mark.allow_call_model_methods
 @pytest.mark.asyncio

Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,7 @@ async def math_guardrail(`
`53`	`53`
`54`	`54`	`return GuardrailFunctionOutput(`
`55`	`55`	`output_info=final_output,`
`56`		`- tripwire_triggered=not final_output.is_math_homework,`
	`56`	`+ tripwire_triggered=final_output.is_math_homework,`
`57`	`57`	`)`
`58`	`58`
`59`	`59`
Original file line number	Diff line number	Diff line change
`@@ -86,7 +86,7 @@ class InputGuardrail(Generic[TContext]):`
`86`	`86`	`[RunContextWrapper[TContext], Agent[Any], str \| list[TResponseInputItem]],`
`87`	`87`	`MaybeAwaitable[GuardrailFunctionOutput],`
`88`	`88`	`]`
`89`		`- """A function that receives the the agent input and the context, and returns a`
	`89`	`+ """A function that receives the agent input and the context, and returns a`
`90`	`90`	`GuardrailResult`. The result marks whether the tripwire was triggered, and can optionally
`91`	`91`	`include information about the guardrail's output.`
`92`	`92`	`"""`