Skip to content

Commit b4183c7

Browse files
committed
Default to gemini 2.0 flash instead of 1.5 flash on Gemini setup
Add price of gemini 2.0 flash for cost calculations
1 parent 701a7be commit b4183c7

File tree

6 files changed

+11
-10
lines changed

6 files changed

+11
-10
lines changed

.github/workflows/run_evals.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ jobs:
147147
echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY
148148
echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY
149149
echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY
150-
echo "- Chat Model: Gemini 1.5 Flash 002" >> $GITHUB_STEP_SUMMARY
150+
echo "- Chat Model: Gemini 2.0 Flash" >> $GITHUB_STEP_SUMMARY
151151
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
152152
tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY
153153
echo "" >> $GITHUB_STEP_SUMMARY

documentation/docs/get-started/setup.mdx

+1-1
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ Using Ollama? See the [Ollama Integration](/advanced/ollama) section for more cu
333333
- Add your [Gemini API key](https://aistudio.google.com/app/apikey)
334334
- Give the configuration a friendly name like `Gemini`. Do not configure the API base url.
335335
2. Create a new [chat model](http://localhost:42110/server/admin/database/chatmodel/add)
336-
- Set the `chat-model` field to a [Google Gemini chat model](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models). Example: `gemini-1.5-flash`.
336+
- Set the `chat-model` field to a [Google Gemini chat model](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models). Example: `gemini-2.0-flash`.
337337
- Set the `model-type` field to `Google`.
338338
- Set the `ai model api` field to the Gemini AI Model API you created in step 1.
339339

src/khoj/processor/conversation/google/gemini_chat.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
def extract_questions_gemini(
3333
text,
34-
model: Optional[str] = "gemini-1.5-flash",
34+
model: Optional[str] = "gemini-2.0-flash",
3535
conversation_log={},
3636
api_key=None,
3737
temperature=0,
@@ -132,9 +132,9 @@ def gemini_send_message_to_model(
132132

133133
model_kwargs = {}
134134

135-
# Sometimes, this causes unwanted behavior and terminates response early. Disable for now while it's flaky.
136-
# if response_type == "json_object":
137-
# model_kwargs["response_mime_type"] = "application/json"
135+
# This caused unwanted behavior and terminates response early for gemini 1.5 series. Monitor for flakiness with 2.0 series.
136+
if response_type == "json_object" and model in ["gemini-2.0-flash"]:
137+
model_kwargs["response_mime_type"] = "application/json"
138138

139139
# Get Response from Gemini
140140
return gemini_completion_with_backoff(
@@ -154,7 +154,7 @@ def converse_gemini(
154154
online_results: Optional[Dict[str, Dict]] = None,
155155
code_results: Optional[Dict[str, Dict]] = None,
156156
conversation_log={},
157-
model: Optional[str] = "gemini-1.5-flash",
157+
model: Optional[str] = "gemini-2.0-flash",
158158
api_key: Optional[str] = None,
159159
temperature: float = 0.2,
160160
completion_func=None,

src/khoj/utils/constants.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"bartowski/Qwen2.5-14B-Instruct-GGUF",
1919
]
2020
default_openai_chat_models = ["gpt-4o-mini", "gpt-4o"]
21-
default_gemini_chat_models = ["gemini-1.5-flash", "gemini-1.5-pro"]
21+
default_gemini_chat_models = ["gemini-2.0-flash", "gemini-1.5-pro"]
2222
default_anthropic_chat_models = ["claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022"]
2323

2424
empty_config = {
@@ -46,6 +46,7 @@
4646
"gemini-1.5-flash-002": {"input": 0.075, "output": 0.30},
4747
"gemini-1.5-pro": {"input": 1.25, "output": 5.00},
4848
"gemini-1.5-pro-002": {"input": 1.25, "output": 5.00},
49+
"gemini-2.0-flash": {"input": 0.10, "output": 0.40},
4950
# Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api_
5051
"claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
5152
"claude-3-5-haiku-20241022": {"input": 1.0, "output": 5.0},

tests/conftest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ def chat_client_builder(search_config, user, index_content=True, require_auth=Fa
315315
if chat_provider == ChatModel.ModelType.OPENAI:
316316
online_chat_model = ChatModelFactory(name="gpt-4o-mini", model_type="openai")
317317
elif chat_provider == ChatModel.ModelType.GOOGLE:
318-
online_chat_model = ChatModelFactory(name="gemini-1.5-flash", model_type="google")
318+
online_chat_model = ChatModelFactory(name="gemini-2.0-flash", model_type="google")
319319
elif chat_provider == ChatModel.ModelType.ANTHROPIC:
320320
online_chat_model = ChatModelFactory(name="claude-3-5-haiku-20241022", model_type="anthropic")
321321
if online_chat_model:

tests/evals/eval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,7 @@ def main():
629629
response_evaluator = evaluate_response_with_mcq_match
630630
elif args.dataset == "math500":
631631
response_evaluator = partial(
632-
evaluate_response_with_gemini, eval_model=os.getenv("GEMINI_EVAL_MODEL", "gemini-1.5-flash-002")
632+
evaluate_response_with_gemini, eval_model=os.getenv("GEMINI_EVAL_MODEL", "gemini-2.0-flash-001")
633633
)
634634
elif args.dataset == "frames_ir":
635635
response_evaluator = evaluate_response_for_ir

0 commit comments

Comments
 (0)