Skip to content

When using the Azure LLM key and employing "Runner.run_streamed", the token usage returned is 0. #838

Open
@ch-royde

Description

@ch-royde

Describe the bug
I replaced the key of OpenAI with Azure's key. I was able to correctly obtain the token usage information in the result returned by Runner.run(). However, when using Runner.run_streamed, the token usage was always returned as 0.

  • openai: 1.84.0
  • openai-agents: 0.0.17
  • python: 3.10
client = AsyncAzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
)

model = OpenAIChatCompletionsModel(
   model='gpt-4.1',
   openai_client=client,
)

agent=Agent(
  name="Agent",
  instructions=base_instructions,
  model=model,
  model_settings=ModelSettings(
     parallel_tool_calls=False, temperature=0.8
  ),
)

# Run streaming process
result = Runner.run_streamed(
    starting_agent=agent,
    input=enhanced_message,
    context=chat_context,
    max_turns=max_turns,
    run_config=run_config,
)

logger.info(f"Start stream events")
async for event in result.stream_events():
    if event.type == "raw_response_event":
        if isinstance(event.data, ResponseTextDeltaEvent):
            pass
    elif event.type == "run_item_stream_event":
        if event.item.type == "tool_call_item":
            logger.info(f"--- Tool Call: {event.item.raw_item.name} ---")
            logger.info(f"Tool Call Full Info: {event.item.raw_item}")
        elif event.item.type == "tool_call_output_item":
            logger.info("--- Tool Call Result ---")
            # Ensure output is string type
            output_str = str(event.item.output)
            # Only log first 1000 chars
            logger.info(output_str[:1000])

logger.info("**Stream successfully finished")
logger.info(f"**Final agent name: [{result.last_agent.name}]")
final_output = result.final_output
final_output_type = type(final_output)
logger.info(f"**Final output type: [{final_output_type}]")
logger.info(f"**Final output: [{final_output}]")

if final_output is None:
    logger.error("**Final output is None")
    raise NoneDataError(detail="Final output is None")

logger.info("**Token usage:")
total_input_tokens = 0
total_output_tokens = 0
total_tokens = 0
for index, r in enumerate(result.raw_responses):
    logger.info(
        f"{index}: input_tokens: [{r.usage.input_tokens}], output_tokens: [{r.usage.output_tokens}], total_tokens: [{r.usage.total_tokens}]"
    )
    total_input_tokens += r.usage.input_tokens
    total_output_tokens += r.usage.output_tokens
    total_tokens += r.usage.total_tokens
logger.info(
    f"**Total input tokens: [{total_input_tokens}], total output tokens: [{total_output_tokens}], total tokens: [{total_tokens}]"
)

final out logs:

Image

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions