Closed
Description
What happened?
When streaming from Claude 3.7 Sonnet, response chunks do not contain thinking_blocks
of type redacted_thinking
when forcing redacted thinking with the magic string specified by Anthropic. For all other queries, thinking blocks
are emitted as expected.
Here's a minimal test:
import asyncio
import litellm
litellm._turn_on_debug()
async def has_chunks_with_thinking_blocks(query: str):
stream = await litellm.acompletion(
model="anthropic/claude-3-7-sonnet-20250219",
messages=[{"role": "user", "content": query}],
stream=True,
reasoning_effort="low",
)
has_thinking_blocks = False
async for chunk in stream:
chunk_delta = chunk.choices[0].delta
if hasattr(chunk_delta, "thinking_blocks") and chunk_delta.thinking_blocks:
has_thinking_blocks = True
return has_thinking_blocks
async def main():
MAGIC_STRING = "ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB"
print(await has_chunks_with_thinking_blocks("How are you?")) # True
print(await has_chunks_with_thinking_blocks(MAGIC_STRING)) # False (expected True)
if __name__ == "__main__":
asyncio.run(main())
Relevant log output
06:41:28 - LiteLLM:DEBUG: utils.py:332 -
06:41:28 - LiteLLM:DEBUG: utils.py:332 - Request to litellm:
06:41:28 - LiteLLM:DEBUG: utils.py:332 - litellm.acompletion(model='anthropic/claude-3-7-sonnet-20250219', messages=[{'role': 'user', 'content': 'How are you?'}], stream=True, reasoning_effort='low')
06:41:28 - LiteLLM:DEBUG: utils.py:332 -
06:41:28 - LiteLLM:DEBUG: litellm_logging.py:426 - self.optional_params: {}
06:41:28 - LiteLLM:DEBUG: utils.py:332 - ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None
06:41:28 - LiteLLM:DEBUG: caching_handler.py:210 - CACHE RESULT: None
06:41:28 - LiteLLM:INFO: utils.py:3108 -
LiteLLM completion() model= claude-3-7-sonnet-20250219; provider = anthropic
06:41:28 - LiteLLM:DEBUG: utils.py:3111 -
LiteLLM: Params passed to completion() {'model': 'claude-3-7-sonnet-20250219', 'functions': None, 'function_call': None, 'temperature': None, 'top_p': None, 'n': None, 'stream': True, 'stream_options': None, 'stop': None, 'max_tokens': None, 'max_completion_tokens': None, 'modalities': None, 'prediction': None, 'audio': None, 'presence_penalty': None, 'frequency_penalty': None, 'logit_bias': None, 'user': None, 'custom_llm_provider': 'anthropic', 'response_format': None, 'seed': None, 'tools': None, 'tool_choice': None, 'max_retries': None, 'logprobs': None, 'top_logprobs': None, 'extra_headers': None, 'api_version': None, 'parallel_tool_calls': None, 'drop_params': None, 'allowed_openai_params': None, 'reasoning_effort': 'low', 'additional_drop_params': None, 'messages': [{'role': 'user', 'content': 'How are you?'}], 'thinking': None}
06:41:28 - LiteLLM:DEBUG: utils.py:3114 -
LiteLLM: Non-Default params passed to completion() {'stream': True, 'reasoning_effort': 'low'}
06:41:28 - LiteLLM:DEBUG: utils.py:332 - Final returned optional params: {'stream': True, 'thinking': {'type': 'enabled', 'budget_tokens': 1024}, 'max_tokens': 1280}
06:41:28 - LiteLLM:DEBUG: litellm_logging.py:426 - self.optional_params: {'stream': True, 'thinking': {'type': 'enabled', 'budget_tokens': 1024}, 'max_tokens': 1280}
06:41:28 - LiteLLM:DEBUG: litellm_logging.py:789 -
POST Request Sent from LiteLLM:
curl -X POST \
https://api.anthropic.com/v1/messages \
-H 'anthropic-version: 20****01' -H 'x-api-key: sk****AA' -H 'accept: ap****on' -H 'content-type: ap****on' \
-d '{'model': 'claude-3-7-sonnet-20250219', 'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': 'How are you?'}]}], 'thinking': {'type': 'enabled', 'budget_tokens': 1024}, 'max_tokens': 1280}'
06:41:28 - LiteLLM:DEBUG: main.py:5676 - _is_function_call: False
06:41:28 - LiteLLM:DEBUG: main.py:5676 - makes async anthropic streaming POST request
06:41:28 - LiteLLM:DEBUG: utils.py:332 - RAW RESPONSE:
<coroutine object AnthropicChatCompletion.acompletion_stream_function at 0x71d46f443130>
06:41:29 - LiteLLM:DEBUG: logging_utils.py:117 - `logging_obj` not found - unable to track `llm_api_duration_ms
06:41:29 - LiteLLM:DEBUG: utils.py:332 - RAW RESPONSE:
<litellm.llms.anthropic.chat.handler.ModelResponseIterator object at 0x71d47351a510>
06:41:29 - LiteLLM:INFO: cost_calculator.py:637 - selected model name for cost calculation: anthropic/claude-3-7-sonnet-20250219
06:41:29 - LiteLLM:DEBUG: utils.py:332 - Token Counter - using generic token counter, for model=anthropic/claude-3-7-sonnet-20250219
06:41:29 - LiteLLM:DEBUG: utils.py:332 - LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
06:41:29 - LiteLLM:DEBUG: utils.py:4436 - checking potential_model_names in litellm.model_cost: {'split_model': 'claude-3-7-sonnet-20250219', 'combined_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'stripped_model_name': 'claude-3-7-sonnet-20250219', 'combined_stripped_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'custom_llm_provider': 'anthropic'}
06:41:29 - LiteLLM:DEBUG: utils.py:4732 - model_info: {'key': 'claude-3-7-sonnet-20250219', 'max_tokens': 128000, 'max_input_tokens': 200000, 'max_output_tokens': 128000, 'input_cost_per_token': 3e-06, 'cache_creation_input_token_cost': 3.75e-06, 'cache_read_input_token_cost': 3e-07, 'input_cost_per_character': None, 'input_cost_per_token_above_128k_tokens': None, 'input_cost_per_token_above_200k_tokens': None, 'input_cost_per_query': None, 'input_cost_per_second': None, 'input_cost_per_audio_token': None, 'input_cost_per_token_batches': None, 'output_cost_per_token_batches': None, 'output_cost_per_token': 1.5e-05, 'output_cost_per_audio_token': None, 'output_cost_per_character': None, 'output_cost_per_reasoning_token': None, 'output_cost_per_token_above_128k_tokens': None, 'output_cost_per_character_above_128k_tokens': None, 'output_cost_per_token_above_200k_tokens': None, 'output_cost_per_second': None, 'output_cost_per_image': None, 'output_vector_size': None, 'litellm_provider': 'anthropic', 'mode': 'chat', 'supports_system_messages': None, 'supports_response_schema': True, 'supports_vision': True, 'supports_function_calling': True, 'supports_tool_choice': True, 'supports_assistant_prefill': True, 'supports_prompt_caching': True, 'supports_audio_input': False, 'supports_audio_output': False, 'supports_pdf_input': True, 'supports_embedding_image_input': False, 'supports_native_streaming': None, 'supports_web_search': False, 'supports_reasoning': True, 'search_context_cost_per_query': None, 'tpm': None, 'rpm': None}
06:41:29 - LiteLLM:DEBUG: litellm_logging.py:1011 - response_cost: 0.0
06:41:29 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(reasoning_content='This', thinking_blocks=[{'type': 'thinking', 'thinking': 'This', 'signature': None}], provider_specific_fields={'thinking_blocks': [{'type': 'thinking', 'thinking': 'This', 'signature': None}]}, content='', role='assistant', function_call=None, tool_calls=None, audio=None)
06:41:29 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(reasoning_content=' is a simple greeting or', thinking_blocks=[{'type': 'thinking', 'thinking': ' is a simple greeting or', 'signature': None}], provider_specific_fields={'thinking_blocks': [{'type': 'thinking', 'thinking': ' is a simple greeting or', 'signature': None}]}, content='', role=None, function_call=None, tool_calls=None, audio=None)
06:41:29 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(reasoning_content=' check-in question. I', thinking_blocks=[{'type': 'thinking', 'thinking': ' check-in question. I', 'signature': None}], provider_specific_fields={'thinking_blocks': [{'type': 'thinking', 'thinking': ' check-in question. I', 'signature': None}]}, content='', role=None, function_call=None, tool_calls=None, audio=None)
06:41:29 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(reasoning_content=' should respond in a friendly', thinking_blocks=[{'type': 'thinking', 'thinking': ' should respond in a friendly', 'signature': None}], provider_specific_fields={'thinking_blocks': [{'type': 'thinking', 'thinking': ' should respond in a friendly', 'signature': None}]}, content='', role=None, function_call=None, tool_calls=None, audio=None)
06:41:29 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(reasoning_content=' and conversational way,', thinking_blocks=[{'type': 'thinking', 'thinking': ' and conversational way,', 'signature': None}], provider_specific_fields={'thinking_blocks': [{'type': 'thinking', 'thinking': ' and conversational way,', 'signature': None}]}, content='', role=None, function_call=None, tool_calls=None, audio=None)
06:41:29 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(reasoning_content=' letting the person know my', thinking_blocks=[{'type': 'thinking', 'thinking': ' letting the person know my', 'signature': None}], provider_specific_fields={'thinking_blocks': [{'type': 'thinking', 'thinking': ' letting the person know my', 'signature': None}]}, content='', role=None, function_call=None, tool_calls=None, audio=None)
06:41:30 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(reasoning_content=' status as an AI assistant.', thinking_blocks=[{'type': 'thinking', 'thinking': ' status as an AI assistant.', 'signature': None}], provider_specific_fields={'thinking_blocks': [{'type': 'thinking', 'thinking': ' status as an AI assistant.', 'signature': None}]}, content='', role=None, function_call=None, tool_calls=None, audio=None)
06:41:30 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(reasoning_content='', thinking_blocks=[{'type': 'thinking', 'thinking': '', 'signature': 'ErUBCkYIAhgCIkCxV1MwuVCFc9psQD0NzML//3XFEhW3lKsagKnnpdnSZgwHTExKFAxfGR48yGW+lZo9GhsV67usJ4AwaT6HqZYjEgz+htp/4T2ZvLi/CXwaDNeIQlv19qn5tcoTxiIw/Nv6R5z+vTCs9BTUpQiN23xScM62mY0xn5FkBrskUH76eKj3zj+JjGN3rTvow62DKh0sL0Ns7ufMhHXwMGJ0lk1S1LtU5FFCzuyUgzuXYhgC'}], provider_specific_fields={'thinking_blocks': [{'type': 'thinking', 'thinking': '', 'signature': 'ErUBCkYIAhgCIkCxV1MwuVCFc9psQD0NzML//3XFEhW3lKsagKnnpdnSZgwHTExKFAxfGR48yGW+lZo9GhsV67usJ4AwaT6HqZYjEgz+htp/4T2ZvLi/CXwaDNeIQlv19qn5tcoTxiIw/Nv6R5z+vTCs9BTUpQiN23xScM62mY0xn5FkBrskUH76eKj3zj+JjGN3rTvow62DKh0sL0Ns7ufMhHXwMGJ0lk1S1LtU5FFCzuyUgzuXYhgC'}]}, content='', role=None, function_call=None, tool_calls=None, audio=None)
06:41:30 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content="I'm doing well, thank you", role=None, function_call=None, tool_calls=None, audio=None)
06:41:30 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=" for asking! I'm", role=None, function_call=None, tool_calls=None, audio=None)
06:41:30 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=' ready to help you with information', role=None, function_call=None, tool_calls=None, audio=None)
06:41:30 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=', answer questions, or have a', role=None, function_call=None, tool_calls=None, audio=None)
06:41:30 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=" conversation about topics you're interested in.", role=None, function_call=None, tool_calls=None, audio=None)
06:41:30 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=' How about you? Is there something', role=None, function_call=None, tool_calls=None, audio=None)
06:41:30 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=' specific I can assist you with today?', role=None, function_call=None, tool_calls=None, audio=None)
06:41:30 - LiteLLM:DEBUG: utils.py:332 - Token Counter - using generic token counter, for model=
06:41:30 - LiteLLM:DEBUG: utils.py:332 - LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
True06:41:30 - LiteLLM:DEBUG: litellm_logging.py:1232 - Logging Details LiteLLM-Success Call: Cache_hit=False
06:41:30 - LiteLLM:DEBUG: litellm_logging.py:1257 - Logging Details LiteLLM-Success Call streaming complete
06:41:30 - LiteLLM:INFO: cost_calculator.py:637 - selected model name for cost calculation: anthropic/claude-3-7-sonnet-20250219
06:41:30 - LiteLLM:DEBUG: utils.py:4436 - checking potential_model_names in litellm.model_cost: {'split_model': 'claude-3-7-sonnet-20250219', 'combined_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'stripped_model_name': 'claude-3-7-sonnet-20250219', 'combined_stripped_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'custom_llm_provider': 'anthropic'}
06:41:30 - LiteLLM:DEBUG: utils.py:4732 - model_info: {'key': 'claude-3-7-sonnet-20250219', 'max_tokens': 128000, 'max_input_tokens': 200000, 'max_output_tokens': 128000, 'input_cost_per_token': 3e-06, 'cache_creation_input_token_cost': 3.75e-06, 'cache_read_input_token_cost': 3e-07, 'input_cost_per_character': None, 'input_cost_per_token_above_128k_tokens': None, 'input_cost_per_token_above_200k_tokens': None, 'input_cost_per_query': None, 'input_cost_per_second': None, 'input_cost_per_audio_token': None, 'input_cost_per_token_batches': None, 'output_cost_per_token_batches': None, 'output_cost_per_token': 1.5e-05, 'output_cost_per_audio_token': None, 'output_cost_per_character': None, 'output_cost_per_reasoning_token': None, 'output_cost_per_token_above_128k_tokens': None, 'output_cost_per_character_above_128k_tokens': None, 'output_cost_per_token_above_200k_tokens': None, 'output_cost_per_second': None, 'output_cost_per_image': None, 'output_vector_size': None, 'litellm_provider': 'anthropic', 'mode': 'chat', 'supports_system_messages': None, 'supports_response_schema': True, 'supports_vision': True, 'supports_function_calling': True, 'supports_tool_choice': True, 'supports_assistant_prefill': True, 'supports_prompt_caching': True, 'supports_audio_input': False, 'supports_audio_output': False, 'supports_pdf_input': True, 'supports_embedding_image_input': False, 'supports_native_streaming': None, 'supports_web_search': False, 'supports_reasoning': True, 'search_context_cost_per_query': None, 'tpm': None, 'rpm': None}
06:41:30 - LiteLLM:DEBUG: litellm_logging.py:1011 - response_cost: 0.0015119999999999999
06:41:30 - LiteLLM:DEBUG: utils.py:4436 - checking potential_model_names in litellm.model_cost: {'split_model': 'claude-3-7-sonnet-20250219', 'combined_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'stripped_model_name': 'claude-3-7-sonnet-20250219', 'combined_stripped_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'custom_llm_provider': 'anthropic'}
06:41:30 - LiteLLM:DEBUG: utils.py:4732 - model_info: {'key': 'claude-3-7-sonnet-20250219', 'max_tokens': 128000, 'max_input_tokens': 200000, 'max_output_tokens': 128000, 'input_cost_per_token': 3e-06, 'cache_creation_input_token_cost': 3.75e-06, 'cache_read_input_token_cost': 3e-07, 'input_cost_per_character': None, 'input_cost_per_token_above_128k_tokens': None, 'input_cost_per_token_above_200k_tokens': None, 'input_cost_per_query': None, 'input_cost_per_second': None, 'input_cost_per_audio_token': None, 'input_cost_per_token_batches': None, 'output_cost_per_token_batches': None, 'output_cost_per_token': 1.5e-05, 'output_cost_per_audio_token': None, 'output_cost_per_character': None, 'output_cost_per_reasoning_token': None, 'output_cost_per_token_above_128k_tokens': None, 'output_cost_per_character_above_128k_tokens': None, 'output_cost_per_token_above_200k_tokens': None, 'output_cost_per_second': None, 'output_cost_per_image': None, 'output_vector_size': None, 'litellm_provider': 'anthropic', 'mode': 'chat', 'supports_system_messages': None, 'supports_response_schema': True, 'supports_vision': True, 'supports_function_calling': True, 'supports_tool_choice': True, 'supports_assistant_prefill': True, 'supports_prompt_caching': True, 'supports_audio_input': False, 'supports_audio_output': False, 'supports_pdf_input': True, 'supports_embedding_image_input': False, 'supports_native_streaming': None, 'supports_web_search': False, 'supports_reasoning': True, 'search_context_cost_per_query': None, 'tpm': None, 'rpm': None}
06:41:30 - LiteLLM:DEBUG: utils.py:332 -
06:41:30 - LiteLLM:DEBUG: utils.py:332 - Request to litellm:
06:41:30 - LiteLLM:DEBUG: utils.py:332 - litellm.acompletion(model='anthropic/claude-3-7-sonnet-20250219', messages=[{'role': 'user', 'content': 'ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB'}], stream=True, reasoning_effort='low')
06:41:30 - LiteLLM:DEBUG: utils.py:332 -
06:41:30 - LiteLLM:DEBUG: litellm_logging.py:426 - self.optional_params: {}
06:41:30 - LiteLLM:DEBUG: utils.py:332 - ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None
06:41:30 - LiteLLM:DEBUG: caching_handler.py:210 - CACHE RESULT: None
06:41:30 - LiteLLM:DEBUG: utils.py:332 - Logging Details LiteLLM-Async Success Call, cache_hit=False
06:41:30 - LiteLLM:DEBUG: utils.py:332 - Async success callbacks: Got a complete streaming response
06:41:30 - LiteLLM:INFO: cost_calculator.py:637 - selected model name for cost calculation: anthropic/claude-3-7-sonnet-20250219
06:41:30 - LiteLLM:DEBUG: utils.py:4436 - checking potential_model_names in litellm.model_cost: {'split_model': 'claude-3-7-sonnet-20250219', 'combined_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'stripped_model_name': 'claude-3-7-sonnet-20250219', 'combined_stripped_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'custom_llm_provider': 'anthropic'}
06:41:30 - LiteLLM:DEBUG: utils.py:4732 - model_info: {'key': 'claude-3-7-sonnet-20250219', 'max_tokens': 128000, 'max_input_tokens': 200000, 'max_output_tokens': 128000, 'input_cost_per_token': 3e-06, 'cache_creation_input_token_cost': 3.75e-06, 'cache_read_input_token_cost': 3e-07, 'input_cost_per_character': None, 'input_cost_per_token_above_128k_tokens': None, 'input_cost_per_token_above_200k_tokens': None, 'input_cost_per_query': None, 'input_cost_per_second': None, 'input_cost_per_audio_token': None, 'input_cost_per_token_batches': None, 'output_cost_per_token_batches': None, 'output_cost_per_token': 1.5e-05, 'output_cost_per_audio_token': None, 'output_cost_per_character': None, 'output_cost_per_reasoning_token': None, 'output_cost_per_token_above_128k_tokens': None, 'output_cost_per_character_above_128k_tokens': None, 'output_cost_per_token_above_200k_tokens': None, 'output_cost_per_second': None, 'output_cost_per_image': None, 'output_vector_size': None, 'litellm_provider': 'anthropic', 'mode': 'chat', 'supports_system_messages': None, 'supports_response_schema': True, 'supports_vision': True, 'supports_function_calling': True, 'supports_tool_choice': True, 'supports_assistant_prefill': True, 'supports_prompt_caching': True, 'supports_audio_input': False, 'supports_audio_output': False, 'supports_pdf_input': True, 'supports_embedding_image_input': False, 'supports_native_streaming': None, 'supports_web_search': False, 'supports_reasoning': True, 'search_context_cost_per_query': None, 'tpm': None, 'rpm': None}
06:41:30 - LiteLLM:DEBUG: litellm_logging.py:1011 - response_cost: 0.0015119999999999999
06:41:30 - LiteLLM:DEBUG: litellm_logging.py:1796 - Model=claude-3-7-sonnet-20250219; cost=0.0015119999999999999
06:41:30 - LiteLLM:DEBUG: utils.py:4436 - checking potential_model_names in litellm.model_cost: {'split_model': 'claude-3-7-sonnet-20250219', 'combined_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'stripped_model_name': 'claude-3-7-sonnet-20250219', 'combined_stripped_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'custom_llm_provider': 'anthropic'}
06:41:30 - LiteLLM:INFO: utils.py:3108 -
LiteLLM completion() model= claude-3-7-sonnet-20250219; provider = anthropic
06:41:30 - LiteLLM:DEBUG: utils.py:4732 - model_info: {'key': 'claude-3-7-sonnet-20250219', 'max_tokens': 128000, 'max_input_tokens': 200000, 'max_output_tokens': 128000, 'input_cost_per_token': 3e-06, 'cache_creation_input_token_cost': 3.75e-06, 'cache_read_input_token_cost': 3e-07, 'input_cost_per_character': None, 'input_cost_per_token_above_128k_tokens': None, 'input_cost_per_token_above_200k_tokens': None, 'input_cost_per_query': None, 'input_cost_per_second': None, 'input_cost_per_audio_token': None, 'input_cost_per_token_batches': None, 'output_cost_per_token_batches': None, 'output_cost_per_token': 1.5e-05, 'output_cost_per_audio_token': None, 'output_cost_per_character': None, 'output_cost_per_reasoning_token': None, 'output_cost_per_token_above_128k_tokens': None, 'output_cost_per_character_above_128k_tokens': None, 'output_cost_per_token_above_200k_tokens': None, 'output_cost_per_second': None, 'output_cost_per_image': None, 'output_vector_size': None, 'litellm_provider': 'anthropic', 'mode': 'chat', 'supports_system_messages': None, 'supports_response_schema': True, 'supports_vision': True, 'supports_function_calling': True, 'supports_tool_choice': True, 'supports_assistant_prefill': True, 'supports_prompt_caching': True, 'supports_audio_input': False, 'supports_audio_output': False, 'supports_pdf_input': True, 'supports_embedding_image_input': False, 'supports_native_streaming': None, 'supports_web_search': False, 'supports_reasoning': True, 'search_context_cost_per_query': None, 'tpm': None, 'rpm': None}
06:41:30 - LiteLLM:DEBUG: utils.py:3111 -
LiteLLM: Params passed to completion() {'model': 'claude-3-7-sonnet-20250219', 'functions': None, 'function_call': None, 'temperature': None, 'top_p': None, 'n': None, 'stream': True, 'stream_options': None, 'stop': None, 'max_tokens': None, 'max_completion_tokens': None, 'modalities': None, 'prediction': None, 'audio': None, 'presence_penalty': None, 'frequency_penalty': None, 'logit_bias': None, 'user': None, 'custom_llm_provider': 'anthropic', 'response_format': None, 'seed': None, 'tools': None, 'tool_choice': None, 'max_retries': None, 'logprobs': None, 'top_logprobs': None, 'extra_headers': None, 'api_version': None, 'parallel_tool_calls': None, 'drop_params': None, 'allowed_openai_params': None, 'reasoning_effort': 'low', 'additional_drop_params': None, 'messages': [{'role': 'user', 'content': 'ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB'}], 'thinking': None}
06:41:30 - LiteLLM:DEBUG: utils.py:3114 -
LiteLLM: Non-Default params passed to completion() {'stream': True, 'reasoning_effort': 'low'}
06:41:30 - LiteLLM:DEBUG: utils.py:332 - Final returned optional params: {'stream': True, 'thinking': {'type': 'enabled', 'budget_tokens': 1024}, 'max_tokens': 1280}
06:41:30 - LiteLLM:DEBUG: litellm_logging.py:426 - self.optional_params: {'stream': True, 'thinking': {'type': 'enabled', 'budget_tokens': 1024}, 'max_tokens': 1280}
06:41:30 - LiteLLM:DEBUG: litellm_logging.py:789 -
POST Request Sent from LiteLLM:
curl -X POST \
https://api.anthropic.com/v1/messages \
-H 'anthropic-version: 20****01' -H 'x-api-key: sk****AA' -H 'accept: ap****on' -H 'content-type: ap****on' \
-d '{'model': 'claude-3-7-sonnet-20250219', 'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': 'ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB'}]}], 'thinking': {'type': 'enabled', 'budget_tokens': 1024}, 'max_tokens': 1280}'
06:41:30 - LiteLLM:DEBUG: main.py:5676 - _is_function_call: False
06:41:30 - LiteLLM:DEBUG: main.py:5676 - makes async anthropic streaming POST request
06:41:30 - LiteLLM:DEBUG: utils.py:332 - RAW RESPONSE:
<coroutine object AnthropicChatCompletion.acompletion_stream_function at 0x71d46f14cdc0>
06:41:33 - LiteLLM:DEBUG: logging_utils.py:117 - `logging_obj` not found - unable to track `llm_api_duration_ms
06:41:33 - LiteLLM:DEBUG: utils.py:332 - RAW RESPONSE:
<litellm.llms.anthropic.chat.handler.ModelResponseIterator object at 0x71d46f163610>
06:41:33 - LiteLLM:INFO: cost_calculator.py:637 - selected model name for cost calculation: anthropic/claude-3-7-sonnet-20250219
06:41:33 - LiteLLM:DEBUG: utils.py:332 - Token Counter - using generic token counter, for model=anthropic/claude-3-7-sonnet-20250219
06:41:33 - LiteLLM:DEBUG: utils.py:332 - LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
06:41:33 - LiteLLM:DEBUG: utils.py:4436 - checking potential_model_names in litellm.model_cost: {'split_model': 'claude-3-7-sonnet-20250219', 'combined_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'stripped_model_name': 'claude-3-7-sonnet-20250219', 'combined_stripped_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'custom_llm_provider': 'anthropic'}
06:41:33 - LiteLLM:DEBUG: utils.py:4732 - model_info: {'key': 'claude-3-7-sonnet-20250219', 'max_tokens': 128000, 'max_input_tokens': 200000, 'max_output_tokens': 128000, 'input_cost_per_token': 3e-06, 'cache_creation_input_token_cost': 3.75e-06, 'cache_read_input_token_cost': 3e-07, 'input_cost_per_character': None, 'input_cost_per_token_above_128k_tokens': None, 'input_cost_per_token_above_200k_tokens': None, 'input_cost_per_query': None, 'input_cost_per_second': None, 'input_cost_per_audio_token': None, 'input_cost_per_token_batches': None, 'output_cost_per_token_batches': None, 'output_cost_per_token': 1.5e-05, 'output_cost_per_audio_token': None, 'output_cost_per_character': None, 'output_cost_per_reasoning_token': None, 'output_cost_per_token_above_128k_tokens': None, 'output_cost_per_character_above_128k_tokens': None, 'output_cost_per_token_above_200k_tokens': None, 'output_cost_per_second': None, 'output_cost_per_image': None, 'output_vector_size': None, 'litellm_provider': 'anthropic', 'mode': 'chat', 'supports_system_messages': None, 'supports_response_schema': True, 'supports_vision': True, 'supports_function_calling': True, 'supports_tool_choice': True, 'supports_assistant_prefill': True, 'supports_prompt_caching': True, 'supports_audio_input': False, 'supports_audio_output': False, 'supports_pdf_input': True, 'supports_embedding_image_input': False, 'supports_native_streaming': None, 'supports_web_search': False, 'supports_reasoning': True, 'search_context_cost_per_query': None, 'tpm': None, 'rpm': None}
06:41:33 - LiteLLM:DEBUG: litellm_logging.py:1011 - response_cost: 0.0
06:41:33 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content="I notice you've sent what", role='assistant', function_call=None, tool_calls=None, audio=None)
06:41:33 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=' appears to be a special', role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=' string or command,', role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=" but I don't recognize", role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=' this as a valid instruction', role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=". I'm Claude", role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=', an AI assistant create', role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content='d by Anthropic to', role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=' be helpful, harmless', role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=', and honest.', role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content='\n\nIs there something specific I can', role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=' help you with today?', role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=" I'm happy to answer", role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=' questions, provide information, have', role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=' a conversation, or assist', role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=' with various tasks within', role=None, function_call=None, tool_calls=None, audio=None)
06:41:34 - LiteLLM:DEBUG: streaming_handler.py:827 - model_response.choices[0].delta: Delta(provider_specific_fields=None, content=' my capabilities.', role=None, function_call=None, tool_calls=None, audio=None)
False06:41:34 - LiteLLM:DEBUG: litellm_logging.py:1232 - Logging Details LiteLLM-Success Call: Cache_hit=False
06:41:34 - LiteLLM:DEBUG: litellm_logging.py:1257 - Logging Details LiteLLM-Success Call streaming complete
06:41:34 - LiteLLM:INFO: cost_calculator.py:637 - selected model name for cost calculation: anthropic/claude-3-7-sonnet-20250219
06:41:34 - LiteLLM:DEBUG: utils.py:4436 - checking potential_model_names in litellm.model_cost: {'split_model': 'claude-3-7-sonnet-20250219', 'combined_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'stripped_model_name': 'claude-3-7-sonnet-20250219', 'combined_stripped_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'custom_llm_provider': 'anthropic'}
06:41:34 - LiteLLM:DEBUG: utils.py:4732 - model_info: {'key': 'claude-3-7-sonnet-20250219', 'max_tokens': 128000, 'max_input_tokens': 200000, 'max_output_tokens': 128000, 'input_cost_per_token': 3e-06, 'cache_creation_input_token_cost': 3.75e-06, 'cache_read_input_token_cost': 3e-07, 'input_cost_per_character': None, 'input_cost_per_token_above_128k_tokens': None, 'input_cost_per_token_above_200k_tokens': None, 'input_cost_per_query': None, 'input_cost_per_second': None, 'input_cost_per_audio_token': None, 'input_cost_per_token_batches': None, 'output_cost_per_token_batches': None, 'output_cost_per_token': 1.5e-05, 'output_cost_per_audio_token': None, 'output_cost_per_character': None, 'output_cost_per_reasoning_token': None, 'output_cost_per_token_above_128k_tokens': None, 'output_cost_per_character_above_128k_tokens': None, 'output_cost_per_token_above_200k_tokens': None, 'output_cost_per_second': None, 'output_cost_per_image': None, 'output_vector_size': None, 'litellm_provider': 'anthropic', 'mode': 'chat', 'supports_system_messages': None, 'supports_response_schema': True, 'supports_vision': True, 'supports_function_calling': True, 'supports_tool_choice': True, 'supports_assistant_prefill': True, 'supports_prompt_caching': True, 'supports_audio_input': False, 'supports_audio_output': False, 'supports_pdf_input': True, 'supports_embedding_image_input': False, 'supports_native_streaming': None, 'supports_web_search': False, 'supports_reasoning': True, 'search_context_cost_per_query': None, 'tpm': None, 'rpm': None}
06:41:34 - LiteLLM:DEBUG: litellm_logging.py:1011 - response_cost: 0.003306
06:41:34 - LiteLLM:DEBUG: utils.py:4436 - checking potential_model_names in litellm.model_cost: {'split_model': 'claude-3-7-sonnet-20250219', 'combined_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'stripped_model_name': 'claude-3-7-sonnet-20250219', 'combined_stripped_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'custom_llm_provider': 'anthropic'}
06:41:34 - LiteLLM:DEBUG: utils.py:4732 - model_info: {'key': 'claude-3-7-sonnet-20250219', 'max_tokens': 128000, 'max_input_tokens': 200000, 'max_output_tokens': 128000, 'input_cost_per_token': 3e-06, 'cache_creation_input_token_cost': 3.75e-06, 'cache_read_input_token_cost': 3e-07, 'input_cost_per_character': None, 'input_cost_per_token_above_128k_tokens': None, 'input_cost_per_token_above_200k_tokens': None, 'input_cost_per_query': None, 'input_cost_per_second': None, 'input_cost_per_audio_token': None, 'input_cost_per_token_batches': None, 'output_cost_per_token_batches': None, 'output_cost_per_token': 1.5e-05, 'output_cost_per_audio_token': None, 'output_cost_per_character': None, 'output_cost_per_reasoning_token': None, 'output_cost_per_token_above_128k_tokens': None, 'output_cost_per_character_above_128k_tokens': None, 'output_cost_per_token_above_200k_tokens': None, 'output_cost_per_second': None, 'output_cost_per_image': None, 'output_vector_size': None, 'litellm_provider': 'anthropic', 'mode': 'chat', 'supports_system_messages': None, 'supports_response_schema': True, 'supports_vision': True, 'supports_function_calling': True, 'supports_tool_choice': True, 'supports_assistant_prefill': True, 'supports_prompt_caching': True, 'supports_audio_input': False, 'supports_audio_output': False, 'supports_pdf_input': True, 'supports_embedding_image_input': False, 'supports_native_streaming': None, 'supports_web_search': False, 'supports_reasoning': True, 'search_context_cost_per_query': None, 'tpm': None, 'rpm': None}
06:41:34 - LiteLLM:DEBUG: utils.py:332 - Logging Details LiteLLM-Async Success Call, cache_hit=False
06:41:34 - LiteLLM:DEBUG: utils.py:332 - Async success callbacks: Got a complete streaming response
06:41:34 - LiteLLM:INFO: cost_calculator.py:637 - selected model name for cost calculation: anthropic/claude-3-7-sonnet-20250219
06:41:34 - LiteLLM:DEBUG: utils.py:4436 - checking potential_model_names in litellm.model_cost: {'split_model': 'claude-3-7-sonnet-20250219', 'combined_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'stripped_model_name': 'claude-3-7-sonnet-20250219', 'combined_stripped_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'custom_llm_provider': 'anthropic'}
06:41:34 - LiteLLM:DEBUG: utils.py:4732 - model_info: {'key': 'claude-3-7-sonnet-20250219', 'max_tokens': 128000, 'max_input_tokens': 200000, 'max_output_tokens': 128000, 'input_cost_per_token': 3e-06, 'cache_creation_input_token_cost': 3.75e-06, 'cache_read_input_token_cost': 3e-07, 'input_cost_per_character': None, 'input_cost_per_token_above_128k_tokens': None, 'input_cost_per_token_above_200k_tokens': None, 'input_cost_per_query': None, 'input_cost_per_second': None, 'input_cost_per_audio_token': None, 'input_cost_per_token_batches': None, 'output_cost_per_token_batches': None, 'output_cost_per_token': 1.5e-05, 'output_cost_per_audio_token': None, 'output_cost_per_character': None, 'output_cost_per_reasoning_token': None, 'output_cost_per_token_above_128k_tokens': None, 'output_cost_per_character_above_128k_tokens': None, 'output_cost_per_token_above_200k_tokens': None, 'output_cost_per_second': None, 'output_cost_per_image': None, 'output_vector_size': None, 'litellm_provider': 'anthropic', 'mode': 'chat', 'supports_system_messages': None, 'supports_response_schema': True, 'supports_vision': True, 'supports_function_calling': True, 'supports_tool_choice': True, 'supports_assistant_prefill': True, 'supports_prompt_caching': True, 'supports_audio_input': False, 'supports_audio_output': False, 'supports_pdf_input': True, 'supports_embedding_image_input': False, 'supports_native_streaming': None, 'supports_web_search': False, 'supports_reasoning': True, 'search_context_cost_per_query': None, 'tpm': None, 'rpm': None}
06:41:34 - LiteLLM:DEBUG: litellm_logging.py:1011 - response_cost: 0.003306
06:41:34 - LiteLLM:DEBUG: litellm_logging.py:1796 - Model=claude-3-7-sonnet-20250219; cost=0.003306
06:41:34 - LiteLLM:DEBUG: utils.py:4436 - checking potential_model_names in litellm.model_cost: {'split_model': 'claude-3-7-sonnet-20250219', 'combined_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'stripped_model_name': 'claude-3-7-sonnet-20250219', 'combined_stripped_model_name': 'anthropic/claude-3-7-sonnet-20250219', 'custom_llm_provider': 'anthropic'}
06:41:34 - LiteLLM:DEBUG: utils.py:4732 - model_info: {'key': 'claude-3-7-sonnet-20250219', 'max_tokens': 128000, 'max_input_tokens': 200000, 'max_output_tokens': 128000, 'input_cost_per_token': 3e-06, 'cache_creation_input_token_cost': 3.75e-06, 'cache_read_input_token_cost': 3e-07, 'input_cost_per_character': None, 'input_cost_per_token_above_128k_tokens': None, 'input_cost_per_token_above_200k_tokens': None, 'input_cost_per_query': None, 'input_cost_per_second': None, 'input_cost_per_audio_token': None, 'input_cost_per_token_batches': None, 'output_cost_per_token_batches': None, 'output_cost_per_token': 1.5e-05, 'output_cost_per_audio_token': None, 'output_cost_per_character': None, 'output_cost_per_reasoning_token': None, 'output_cost_per_token_above_128k_tokens': None, 'output_cost_per_character_above_128k_tokens': None, 'output_cost_per_token_above_200k_tokens': None, 'output_cost_per_second': None, 'output_cost_per_image': None, 'output_vector_size': None, 'litellm_provider': 'anthropic', 'mode': 'chat', 'supports_system_messages': None, 'supports_response_schema': True, 'supports_vision': True, 'supports_function_calling': True, 'supports_tool_choice': True, 'supports_assistant_prefill': True, 'supports_prompt_caching': True, 'supports_audio_input': False, 'supports_audio_output': False, 'supports_pdf_input': True, 'supports_embedding_image_input': False, 'supports_native_streaming': None, 'supports_web_search': False, 'supports_reasoning': True, 'search_context_cost_per_query': None, 'tpm': None, 'rpm': None}
Are you a ML Ops Team?
No
What LiteLLM version are you on ?
v1.67.2
Twitter / LinkedIn details
No response