Skip to content

Commit 5cdf055

Browse files
committed
chore: made some libs optional
1 parent 54c69a2 commit 5cdf055

17 files changed

+65
-1393
lines changed

README.md

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -32,36 +32,12 @@ The reference page for Scrapegraph-ai is available on the official page of PyPI:
3232
```bash
3333
pip install scrapegraphai
3434

35-
# IMPORTANT (to fetch webpage content)
35+
# IMPORTANT (to fetch websites content)
3636
playwright install
3737
```
3838

3939
**Note**: it is recommended to install the library in a virtual environment to avoid conflicts with other libraries 🐱
4040

41-
<details>
42-
<summary><b>Optional Dependencies</b></summary>
43-
Additional dependecies can be added while installing the library:
44-
45-
- <b>More Language Models</b>: additional language models are installed, such as Fireworks, Groq, Anthropic, Hugging Face, and Nvidia AI Endpoints.
46-
47-
This group allows you to use additional language models like Fireworks, Groq, Anthropic, Together AI, Hugging Face, and Nvidia AI Endpoints.
48-
```bash
49-
pip install scrapegraphai[other-language-models]
50-
```
51-
- <b>Semantic Options</b>: this group includes tools for advanced semantic processing, such as Graphviz.
52-
53-
```bash
54-
pip install scrapegraphai[more-semantic-options]
55-
```
56-
57-
- <b>Browsers Options</b>: this group includes additional browser management tools/services, such as Browserbase.
58-
59-
```bash
60-
pip install scrapegraphai[more-browser-options]
61-
```
62-
63-
</details>
64-
6541

6642
## 💻 Usage
6743
There are multiple standard scraping pipelines that can be used to extract information from a website (or local file).

docs/turkish.md

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,31 +31,6 @@ playwright install
3131

3232
**Not**: Diğer kütüphanelerle çakışmaları önlemek için kütüphaneyi sanal bir ortamda kurmanız önerilir 🐱
3333

34-
<details>
35-
<summary><b>Opsiyonel Bağımlılıklar</b></summary>
36-
Kütüphaneyi kurarken ek bağımlılıklar ekleyebilirsiniz:
37-
38-
- **Daha Fazla Dil Modeli**: Fireworks, Groq, Anthropic, Hugging Face ve Nvidia AI Endpoints gibi ek dil modelleri kurulur.
39-
40-
Bu grup, Fireworks, Groq, Anthropic, Together AI, Hugging Face ve Nvidia AI Endpoints gibi ek dil modellerini kullanmanızı sağlar.
41-
42-
```bash
43-
pip install scrapegraphai[other-language-models]
44-
```
45-
46-
- **Semantik Seçenekler**: Graphviz gibi gelişmiş semantik işleme araçlarını içerir.
47-
48-
```bash
49-
pip install scrapegraphai[more-semantic-options]
50-
```
51-
52-
- **Tarayıcı Seçenekleri**: Browserbase gibi ek tarayıcı yönetim araçları/hizmetlerini içerir.
53-
54-
```bash
55-
pip install scrapegraphai[more-browser-options]
56-
```
57-
58-
</details>
5934

6035
## 💻 Kullanım
6136

examples/openai/depth_search_graph_openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
load_dotenv()
99

10-
openai_key = os.getenv("OPENAI_APIKEY")
10+
openai_key = os.getenv("OPENAI_API_KEY")
1111

1212
graph_config = {
1313
"llm": {

examples/openai/search_graph_openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# Define the configuration for the graph
1212
# ************************************************
1313

14-
openai_key = os.getenv("OPENAI_APIKEY")
14+
openai_key = os.getenv("OPENAI_API_KEY")
1515

1616
graph_config = {
1717
"llm": {

examples/openai/speech_graph_openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
# Define the configuration for the graph
2121
# ************************************************
2222

23-
openai_key = os.getenv("OPENAI_APIKEY")
23+
openai_key = os.getenv("OPENAI_API_KEY")
2424

2525
graph_config = {
2626
"llm": {

funding.json

Lines changed: 0 additions & 83 deletions
This file was deleted.

pyproject.toml

Lines changed: 6 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,11 @@ authors = [
1111

1212
dependencies = [
1313
"langchain>=0.3.0",
14-
"langchain-google-genai>=1.0.7",
1514
"langchain-openai>=0.1.22",
1615
"langchain-mistralai>=0.1.12",
1716
"langchain_community>=0.2.9",
1817
"langchain-aws>=0.1.3",
19-
"mistral-common>=1.4.0",
18+
"langchain-ollama>=0.1.3",
2019
"html2text>=2024.2.26",
2120
"beautifulsoup4>=4.12.3",
2221
"python-dotenv>=1.0.1",
@@ -26,16 +25,11 @@ dependencies = [
2625
"free-proxy>=1.1.1",
2726
"playwright>=1.43.0",
2827
"undetected-playwright>=0.3.0",
29-
"langchain-ollama>=0.1.3",
3028
"semchunk>=2.2.0",
31-
"qdrant-client>=1.11.3",
32-
"fastembed>=0.3.6",
33-
34-
"transformers>=4.44.2",
3529
"googlesearch-python>=1.2.5",
3630
"async-timeout>=4.0.3",
3731
"simpleeval>=1.0.0",
38-
"scrapegraph-py>=1.7.0"
32+
"jsonschema>=4.23.0",
3933
]
4034

4135
readme = "README.md"
@@ -73,30 +67,7 @@ requires-python = ">=3.10,<4.0"
7367
[project.optional-dependencies]
7468
burr = ["burr[start]==0.22.1"]
7569
docs = ["sphinx==6.0", "furo==2024.5.6"]
76-
77-
# Group 1: Other Language Models
78-
other-language-models = [
79-
"langchain-google-vertexai>=1.0.7",
80-
"langchain-fireworks>=0.1.3",
81-
"langchain-groq>=0.1.3",
82-
"langchain-anthropic>=0.1.11",
83-
"langchain-huggingface>=0.0.3",
84-
"langchain-nvidia-ai-endpoints>=0.1.6",
85-
"langchain_together>=0.2.0"
86-
]
87-
88-
# Group 2: More Semantic Options
89-
more-semantic-options = [
90-
"graphviz>=0.20.3",
91-
]
92-
93-
# Group 3: More Browser Options
94-
more-browser-options = [
95-
"browserbase>=0.3.0",
96-
]
97-
98-
# Group 4: Surya Library
99-
screenshot_scraper = [
70+
ocr = [
10071
"surya-ocr>=0.5.0",
10172
"matplotlib>=3.7.2",
10273
"ipywidgets>=8.1.0",
@@ -105,21 +76,13 @@ screenshot_scraper = [
10576

10677
[build-system]
10778
requires = ["hatchling==1.26.3"]
108-
10979
build-backend = "hatchling.build"
11080

111-
[dependency-groups]
112-
dev = [
113-
"burr[start]==0.22.1",
114-
"sphinx==6.0",
115-
"furo==2024.5.6",
116-
]
117-
11881
[tool.uv]
11982
dev-dependencies = [
120-
"poethepoet>=0.31.1",
121-
"pytest==8.0.0",
122-
"pytest-mock==3.14.0",
83+
"pytest>=8.0.0",
84+
"pytest-mock>=3.14.0",
85+
"pytest-asyncio>=0.25.0",
12386
"pylint>=3.2.5",
12487
]
12588

scrapegraphai/builders/graph_builder.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from langchain_core.prompts import ChatPromptTemplate
55
from langchain.chains import create_extraction_chain
66
from langchain_community.chat_models import ErnieBotChat
7-
from langchain_google_genai import ChatGoogleGenerativeAI
87
from langchain_openai import ChatOpenAI
98
from ..helpers import nodes_metadata, graph_schema
109

@@ -70,6 +69,10 @@ def _create_llm(self, llm_config: dict):
7069
if "gpt-" in llm_params["model"]:
7170
return ChatOpenAI(llm_params)
7271
elif "gemini" in llm_params["model"]:
72+
try:
73+
from langchain_google_genai import ChatGoogleGenerativeAI
74+
except ImportError:
75+
raise ImportError("langchain_google_genai is not installed. Please install it using 'pip install langchain-google-genai'.")
7376
return ChatGoogleGenerativeAI(llm_params)
7477
elif "ernie" in llm_params["model"]:
7578
return ErnieBotChat(llm_params)

scrapegraphai/graphs/abstract_graph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,15 +234,15 @@ def _create_llm(self, llm_config: dict) -> object:
234234
from langchain_together import ChatTogether
235235
except ImportError:
236236
raise ImportError("""The langchain_together module is not installed.
237-
Please install it using `pip install scrapegraphai[other-language-models]`.""")
237+
Please install it using `pip install langchain-together`.""")
238238
return ChatTogether(**llm_params)
239239

240240
elif model_provider == "nvidia":
241241
try:
242242
from langchain_nvidia_ai_endpoints import ChatNVIDIA
243243
except ImportError:
244244
raise ImportError("""The langchain_nvidia_ai_endpoints module is not installed.
245-
Please install it using `pip install scrapegraphai[other-language-models]`.""")
245+
Please install it using `pip install langchain-nvidia-ai-endpoints`.""")
246246
return ChatNVIDIA(**llm_params)
247247

248248
except Exception as e:

scrapegraphai/graphs/smart_scraper_graph.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
"""
44
from typing import Optional
55
from pydantic import BaseModel
6-
from scrapegraph_py import Client
7-
from scrapegraph_py.logger import sgai_logger
86
from .base_graph import BaseGraph
97
from .abstract_graph import AbstractGraph
108
from ..nodes import (
@@ -67,7 +65,12 @@ def _create_graph(self) -> BaseGraph:
6765
BaseGraph: A graph instance representing the web scraping workflow.
6866
"""
6967
if self.llm_model == "scrapegraphai/smart-scraper":
70-
68+
try:
69+
from scrapegraph_py import Client
70+
from scrapegraph_py.logger import sgai_logger
71+
except ImportError:
72+
raise ImportError("scrapegraph_py is not installed. Please install it using 'pip install scrapegraph-py'.")
73+
7174
sgai_logger.set_logging(level="INFO")
7275

7376
# Initialize the client with explicit API key

scrapegraphai/nodes/fetch_node.py

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from ..utils.cleanup_html import cleanup_html
1111
from ..docloaders import ChromiumLoader
1212
from ..utils.convert_to_md import convert_to_md
13-
from ..utils.logging import get_logger
1413
from .base_node import BaseNode
1514

1615
class FetchNode(BaseNode):
@@ -79,24 +78,6 @@ def __init__(
7978
None if node_config is None else node_config.get("storage_state", None)
8079
)
8180

82-
def is_valid_url(self, source: str) -> bool:
83-
"""
84-
Validates if the source string is a valid URL using regex.
85-
86-
Parameters:
87-
source (str): The URL string to validate
88-
89-
Raises:
90-
ValueError: If the URL is invalid
91-
"""
92-
import re
93-
94-
url_pattern = r"^https?://[^\s/$.?#].[^\s]*$"
95-
if not bool(re.match(url_pattern, source)):
96-
raise ValueError(
97-
f"Invalid URL format: {source}. URL must start with http(s):// and contain a valid domain."
98-
)
99-
return True
10081

10182
def execute(self, state):
10283
"""
@@ -129,12 +110,9 @@ def execute(self, state):
129110
elif self.input == "pdf_dir":
130111
return state
131112

132-
# For web sources, validate URL before proceeding
133113
try:
134-
if self.is_valid_url(source):
135-
return self.handle_web_source(state, source)
114+
return self.handle_web_source(state, source)
136115
except ValueError as e:
137-
# Re-raise the exception from is_valid_url
138116
raise
139117

140118
return self.handle_local_source(state, source)

0 commit comments

Comments
 (0)