Skip to content

Commit 01a331a

Browse files
committed
feat: ⏰added graph timeout and fixed model_tokens param (#810 #856 #853)
1 parent 5f2df70 commit 01a331a

File tree

6 files changed

+16
-8
lines changed

6 files changed

+16
-8
lines changed

examples/local_models/smart_scraper_ollama.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,24 @@
1-
"""
1+
"""
22
Basic example of scraping pipeline using SmartScraper
33
"""
4+
45
from scrapegraphai.graphs import SmartScraperGraph
56
from scrapegraphai.utils import prettify_exec_info
7+
68
# ************************************************
79
# Define the configuration for the graph
810
# ************************************************
911

1012
graph_config = {
1113
"llm": {
12-
"model": "ollama/llama3.1",
14+
"model": "ollama/llama3.2:3b",
1315
"temperature": 0,
1416
"format": "json", # Ollama needs the format to be specified explicitly
1517
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
18+
"model_tokens": 1024,
1619
},
1720
"verbose": True,
18-
"headless": False
21+
"headless": False,
1922
}
2023

2124
# ************************************************
@@ -24,7 +27,7 @@
2427
smart_scraper_graph = SmartScraperGraph(
2528
prompt="Find some information about what does the company do, the name and a contact email.",
2629
source="https://scrapegraphai.com/",
27-
config=graph_config
30+
config=graph_config,
2831
)
2932

3033
result = smart_scraper_graph.run()

examples/openai/smart_scraper_openai.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
graph_config = {
2121
"llm": {
2222
"api_key": os.getenv("OPENAI_API_KEY"),
23-
"model": "openai/gpt-4o00",
23+
"model": "openai/gpt-4o-mini",
2424
},
2525
"verbose": True,
2626
"headless": False,

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ dependencies = [
3131
"async-timeout>=4.0.3",
3232
"simpleeval>=1.0.0",
3333
"jsonschema>=4.23.0",
34+
"transformers>=4.46.3",
3435
]
3536

3637
readme = "README.md"

scrapegraphai/graphs/abstract_graph.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def __init__(
6868
self.browser_base = self.config.get("browser_base")
6969
self.scrape_do = self.config.get("scrape_do")
7070
self.storage_state = self.config.get("storage_state")
71+
self.timeout = self.config.get("timeout", 480)
7172

7273
self.graph = self._create_graph()
7374
self.final_state = None
@@ -86,6 +87,7 @@ def __init__(
8687
"loader_kwargs": self.loader_kwargs,
8788
"llm_model": self.llm_model,
8889
"cache_path": self.cache_path,
90+
"timeout": self.timeout,
8991
}
9092

9193
self.set_common_params(common_params, overwrite=True)
@@ -194,7 +196,7 @@ def _create_llm(self, llm_config: dict) -> object:
194196
If possible, try to use a model instance instead."""
195197
)
196198

197-
if "model_tokens" not in llm_params:
199+
if llm_params.get("model_tokens", None) is None:
198200
try:
199201
self.model_token = models_tokens[llm_params["model_provider"]][
200202
llm_params["model"]

scrapegraphai/nodes/generate_answer_node.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def __init__(
6666
self.script_creator = node_config.get("script_creator", False)
6767
self.is_md_scraper = node_config.get("is_md_scraper", False)
6868
self.additional_info = node_config.get("additional_info")
69-
self.timeout = node_config.get("timeout", 120)
69+
self.timeout = node_config.get("timeout", 480)
7070

7171
def invoke_with_timeout(self, chain, inputs, timeout):
7272
"""Helper method to invoke chain with timeout"""

uv.lock

+3-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)