Skip to content

Commit 92d6e3e

Browse files
authored
Previous response id (#509)
Allows passing in the previous_response_id to reduce sending the same data again and again. Test plan: Examples. Adding tests in next PR shortly. --- [//]: # (BEGIN SAPLING FOOTER) * __->__ #509 * #508
1 parent 86ad99d commit 92d6e3e

11 files changed

+161
-13
lines changed

src/agents/items.py

+2
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,8 @@ class ModelResponse:
169169
response_id: str | None
170170
"""An ID for the response which can be used to refer to the response in subsequent calls to the
171171
model. Not supported by all model providers.
172+
If using OpenAI models via the Responses API, this is the `response_id` parameter, and it can
173+
be passed to `Runner.run`.
172174
"""
173175

174176
def to_input_items(self) -> list[TResponseInputItem]:

src/agents/models/interface.py

+8
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ async def get_response(
4444
output_schema: AgentOutputSchema | None,
4545
handoffs: list[Handoff],
4646
tracing: ModelTracing,
47+
*,
48+
previous_response_id: str | None,
4749
) -> ModelResponse:
4850
"""Get a response from the model.
4951
@@ -55,6 +57,8 @@ async def get_response(
5557
output_schema: The output schema to use.
5658
handoffs: The handoffs available to the model.
5759
tracing: Tracing configuration.
60+
previous_response_id: the ID of the previous response. Generally not used by the model,
61+
except for the OpenAI Responses API.
5862
5963
Returns:
6064
The full model response.
@@ -71,6 +75,8 @@ def stream_response(
7175
output_schema: AgentOutputSchema | None,
7276
handoffs: list[Handoff],
7377
tracing: ModelTracing,
78+
*,
79+
previous_response_id: str | None,
7480
) -> AsyncIterator[TResponseStreamEvent]:
7581
"""Stream a response from the model.
7682
@@ -82,6 +88,8 @@ def stream_response(
8288
output_schema: The output schema to use.
8389
handoffs: The handoffs available to the model.
8490
tracing: Tracing configuration.
91+
previous_response_id: the ID of the previous response. Generally not used by the model,
92+
except for the OpenAI Responses API.
8593
8694
Returns:
8795
An iterator of response stream events, in OpenAI Responses format.

src/agents/models/openai_chatcompletions.py

+3
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ async def get_response(
108108
output_schema: AgentOutputSchema | None,
109109
handoffs: list[Handoff],
110110
tracing: ModelTracing,
111+
previous_response_id: str | None,
111112
) -> ModelResponse:
112113
with generation_span(
113114
model=str(self.model),
@@ -168,6 +169,8 @@ async def stream_response(
168169
output_schema: AgentOutputSchema | None,
169170
handoffs: list[Handoff],
170171
tracing: ModelTracing,
172+
*,
173+
previous_response_id: str | None,
171174
) -> AsyncIterator[TResponseStreamEvent]:
172175
"""
173176
Yields a partial message as it is generated, as well as the usage information.

src/agents/models/openai_responses.py

+9
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ async def get_response(
6969
output_schema: AgentOutputSchema | None,
7070
handoffs: list[Handoff],
7171
tracing: ModelTracing,
72+
previous_response_id: str | None,
7273
) -> ModelResponse:
7374
with response_span(disabled=tracing.is_disabled()) as span_response:
7475
try:
@@ -79,6 +80,7 @@ async def get_response(
7980
tools,
8081
output_schema,
8182
handoffs,
83+
previous_response_id,
8284
stream=False,
8385
)
8486

@@ -132,6 +134,7 @@ async def stream_response(
132134
output_schema: AgentOutputSchema | None,
133135
handoffs: list[Handoff],
134136
tracing: ModelTracing,
137+
previous_response_id: str | None,
135138
) -> AsyncIterator[ResponseStreamEvent]:
136139
"""
137140
Yields a partial message as it is generated, as well as the usage information.
@@ -145,6 +148,7 @@ async def stream_response(
145148
tools,
146149
output_schema,
147150
handoffs,
151+
previous_response_id,
148152
stream=True,
149153
)
150154

@@ -180,6 +184,7 @@ async def _fetch_response(
180184
tools: list[Tool],
181185
output_schema: AgentOutputSchema | None,
182186
handoffs: list[Handoff],
187+
previous_response_id: str | None,
183188
stream: Literal[True],
184189
) -> AsyncStream[ResponseStreamEvent]: ...
185190

@@ -192,6 +197,7 @@ async def _fetch_response(
192197
tools: list[Tool],
193198
output_schema: AgentOutputSchema | None,
194199
handoffs: list[Handoff],
200+
previous_response_id: str | None,
195201
stream: Literal[False],
196202
) -> Response: ...
197203

@@ -203,6 +209,7 @@ async def _fetch_response(
203209
tools: list[Tool],
204210
output_schema: AgentOutputSchema | None,
205211
handoffs: list[Handoff],
212+
previous_response_id: str | None,
206213
stream: Literal[True] | Literal[False] = False,
207214
) -> Response | AsyncStream[ResponseStreamEvent]:
208215
list_input = ItemHelpers.input_to_new_input_list(input)
@@ -229,9 +236,11 @@ async def _fetch_response(
229236
f"Stream: {stream}\n"
230237
f"Tool choice: {tool_choice}\n"
231238
f"Response format: {response_format}\n"
239+
f"Previous response id: {previous_response_id}\n"
232240
)
233241

234242
return await self._client.responses.create(
243+
previous_response_id=self._non_null_or_not_given(previous_response_id),
235244
instructions=self._non_null_or_not_given(system_instructions),
236245
model=self.model,
237246
input=list_input,

src/agents/result.py

+8
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,14 @@ def to_input_list(self) -> list[TResponseInputItem]:
8080

8181
return original_items + new_items
8282

83+
@property
84+
def last_response_id(self) -> str | None:
85+
"""Convenience method to get the response ID of the last model response."""
86+
if not self.raw_responses:
87+
return None
88+
89+
return self.raw_responses[-1].response_id
90+
8391

8492
@dataclass
8593
class RunResult(RunResultBase):

src/agents/run.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ async def run(
117117
max_turns: int = DEFAULT_MAX_TURNS,
118118
hooks: RunHooks[TContext] | None = None,
119119
run_config: RunConfig | None = None,
120+
previous_response_id: str | None = None,
120121
) -> RunResult:
121122
"""Run a workflow starting at the given agent. The agent will run in a loop until a final
122123
output is generated. The loop runs like so:
@@ -141,6 +142,8 @@ async def run(
141142
AI invocation (including any tool calls that might occur).
142143
hooks: An object that receives callbacks on various lifecycle events.
143144
run_config: Global settings for the entire agent run.
145+
previous_response_id: The ID of the previous response, if using OpenAI models via the
146+
Responses API, this allows you to skip passing in input from the previous turn.
144147
145148
Returns:
146149
A run result containing all the inputs, guardrail results and the output of the last
@@ -230,6 +233,7 @@ async def run(
230233
run_config=run_config,
231234
should_run_agent_start_hooks=should_run_agent_start_hooks,
232235
tool_use_tracker=tool_use_tracker,
236+
previous_response_id=previous_response_id,
233237
),
234238
)
235239
else:
@@ -243,6 +247,7 @@ async def run(
243247
run_config=run_config,
244248
should_run_agent_start_hooks=should_run_agent_start_hooks,
245249
tool_use_tracker=tool_use_tracker,
250+
previous_response_id=previous_response_id,
246251
)
247252
should_run_agent_start_hooks = False
248253

@@ -291,6 +296,7 @@ def run_sync(
291296
max_turns: int = DEFAULT_MAX_TURNS,
292297
hooks: RunHooks[TContext] | None = None,
293298
run_config: RunConfig | None = None,
299+
previous_response_id: str | None = None,
294300
) -> RunResult:
295301
"""Run a workflow synchronously, starting at the given agent. Note that this just wraps the
296302
`run` method, so it will not work if there's already an event loop (e.g. inside an async
@@ -319,6 +325,8 @@ def run_sync(
319325
AI invocation (including any tool calls that might occur).
320326
hooks: An object that receives callbacks on various lifecycle events.
321327
run_config: Global settings for the entire agent run.
328+
previous_response_id: The ID of the previous response, if using OpenAI models via the
329+
Responses API, this allows you to skip passing in input from the previous turn.
322330
323331
Returns:
324332
A run result containing all the inputs, guardrail results and the output of the last
@@ -332,6 +340,7 @@ def run_sync(
332340
max_turns=max_turns,
333341
hooks=hooks,
334342
run_config=run_config,
343+
previous_response_id=previous_response_id,
335344
)
336345
)
337346

@@ -344,6 +353,7 @@ def run_streamed(
344353
max_turns: int = DEFAULT_MAX_TURNS,
345354
hooks: RunHooks[TContext] | None = None,
346355
run_config: RunConfig | None = None,
356+
previous_response_id: str | None = None,
347357
) -> RunResultStreaming:
348358
"""Run a workflow starting at the given agent in streaming mode. The returned result object
349359
contains a method you can use to stream semantic events as they are generated.
@@ -370,7 +380,8 @@ def run_streamed(
370380
AI invocation (including any tool calls that might occur).
371381
hooks: An object that receives callbacks on various lifecycle events.
372382
run_config: Global settings for the entire agent run.
373-
383+
previous_response_id: The ID of the previous response, if using OpenAI models via the
384+
Responses API, this allows you to skip passing in input from the previous turn.
374385
Returns:
375386
A result object that contains data about the run, as well as a method to stream events.
376387
"""
@@ -428,6 +439,7 @@ def run_streamed(
428439
hooks=hooks,
429440
context_wrapper=context_wrapper,
430441
run_config=run_config,
442+
previous_response_id=previous_response_id,
431443
)
432444
)
433445
return streamed_result
@@ -485,6 +497,7 @@ async def _run_streamed_impl(
485497
hooks: RunHooks[TContext],
486498
context_wrapper: RunContextWrapper[TContext],
487499
run_config: RunConfig,
500+
previous_response_id: str | None,
488501
):
489502
current_span: Span[AgentSpanData] | None = None
490503
current_agent = starting_agent
@@ -554,6 +567,7 @@ async def _run_streamed_impl(
554567
should_run_agent_start_hooks,
555568
tool_use_tracker,
556569
all_tools,
570+
previous_response_id,
557571
)
558572
should_run_agent_start_hooks = False
559573

@@ -623,6 +637,7 @@ async def _run_single_turn_streamed(
623637
should_run_agent_start_hooks: bool,
624638
tool_use_tracker: AgentToolUseTracker,
625639
all_tools: list[Tool],
640+
previous_response_id: str | None,
626641
) -> SingleStepResult:
627642
if should_run_agent_start_hooks:
628643
await asyncio.gather(
@@ -662,6 +677,7 @@ async def _run_single_turn_streamed(
662677
get_model_tracing_impl(
663678
run_config.tracing_disabled, run_config.trace_include_sensitive_data
664679
),
680+
previous_response_id=previous_response_id,
665681
):
666682
if isinstance(event, ResponseCompletedEvent):
667683
usage = (
@@ -717,6 +733,7 @@ async def _run_single_turn(
717733
run_config: RunConfig,
718734
should_run_agent_start_hooks: bool,
719735
tool_use_tracker: AgentToolUseTracker,
736+
previous_response_id: str | None,
720737
) -> SingleStepResult:
721738
# Ensure we run the hooks before anything else
722739
if should_run_agent_start_hooks:
@@ -746,6 +763,7 @@ async def _run_single_turn(
746763
context_wrapper,
747764
run_config,
748765
tool_use_tracker,
766+
previous_response_id,
749767
)
750768

751769
return await cls._get_single_step_result_from_response(
@@ -888,6 +906,7 @@ async def _get_new_response(
888906
context_wrapper: RunContextWrapper[TContext],
889907
run_config: RunConfig,
890908
tool_use_tracker: AgentToolUseTracker,
909+
previous_response_id: str | None,
891910
) -> ModelResponse:
892911
model = cls._get_model(agent, run_config)
893912
model_settings = agent.model_settings.resolve(run_config.model_settings)
@@ -903,6 +922,7 @@ async def _get_new_response(
903922
tracing=get_model_tracing_impl(
904923
run_config.tracing_disabled, run_config.trace_include_sensitive_data
905924
),
925+
previous_response_id=previous_response_id,
906926
)
907927

908928
context_wrapper.usage.add(new_response.usage)

tests/fake_model.py

+4
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ async def get_response(
5454
output_schema: AgentOutputSchema | None,
5555
handoffs: list[Handoff],
5656
tracing: ModelTracing,
57+
*,
58+
previous_response_id: str | None,
5759
) -> ModelResponse:
5860
self.last_turn_args = {
5961
"system_instructions": system_instructions,
@@ -93,6 +95,8 @@ async def stream_response(
9395
output_schema: AgentOutputSchema | None,
9496
handoffs: list[Handoff],
9597
tracing: ModelTracing,
98+
*,
99+
previous_response_id: str | None,
96100
) -> AsyncIterator[TResponseStreamEvent]:
97101
with generation_span(disabled=not self.tracing_enabled) as span:
98102
output = self.get_next_output()

tests/test_openai_chatcompletions.py

+3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ async def patched_fetch_response(self, *args, **kwargs):
6767
output_schema=None,
6868
handoffs=[],
6969
tracing=ModelTracing.DISABLED,
70+
previous_response_id=None,
7071
)
7172
# Should have produced exactly one output message with one text part
7273
assert isinstance(resp, ModelResponse)
@@ -115,6 +116,7 @@ async def patched_fetch_response(self, *args, **kwargs):
115116
output_schema=None,
116117
handoffs=[],
117118
tracing=ModelTracing.DISABLED,
119+
previous_response_id=None,
118120
)
119121
assert len(resp.output) == 1
120122
assert isinstance(resp.output[0], ResponseOutputMessage)
@@ -164,6 +166,7 @@ async def patched_fetch_response(self, *args, **kwargs):
164166
output_schema=None,
165167
handoffs=[],
166168
tracing=ModelTracing.DISABLED,
169+
previous_response_id=None,
167170
)
168171
# Expect a message item followed by a function tool call item.
169172
assert len(resp.output) == 2

tests/test_openai_chatcompletions_stream.py

+3
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ async def patched_fetch_response(self, *args, **kwargs):
7979
output_schema=None,
8080
handoffs=[],
8181
tracing=ModelTracing.DISABLED,
82+
previous_response_id=None,
8283
):
8384
output_events.append(event)
8485
# We expect a response.created, then a response.output_item.added, content part added,
@@ -168,6 +169,7 @@ async def patched_fetch_response(self, *args, **kwargs):
168169
output_schema=None,
169170
handoffs=[],
170171
tracing=ModelTracing.DISABLED,
172+
previous_response_id=None,
171173
):
172174
output_events.append(event)
173175
# Expect sequence similar to text: created, output_item.added, content part added,
@@ -255,6 +257,7 @@ async def patched_fetch_response(self, *args, **kwargs):
255257
output_schema=None,
256258
handoffs=[],
257259
tracing=ModelTracing.DISABLED,
260+
previous_response_id=None,
258261
):
259262
output_events.append(event)
260263
# Sequence should be: response.created, then after loop we expect function call-related events:

0 commit comments

Comments
 (0)