Skip to content

Commit 14b4b19

Browse files
committed
docs: improved readme + fix csv scraper imports
1 parent 0b582be commit 14b4b19

19 files changed

+150
-112
lines changed

examples/csv_scraper_graph/ollama/csv_scraper_graph_multi_ollama.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
"""
44

55
import os
6-
import pandas as pd
6+
77
from scrapegraphai.graphs import CSVScraperMultiGraph
8-
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
8+
from scrapegraphai.utils import prettify_exec_info
99

1010
# ************************************************
1111
# Read the CSV file
@@ -15,7 +15,8 @@
1515
curr_dir = os.path.dirname(os.path.realpath(__file__))
1616
file_path = os.path.join(curr_dir, FILE_NAME)
1717

18-
text = pd.read_csv(file_path)
18+
with open(file_path, "r") as file:
19+
text = file.read()
1920

2021
# ************************************************
2122
# Define the configuration for the graph
@@ -44,7 +45,7 @@
4445
csv_scraper_graph = CSVScraperMultiGraph(
4546
prompt="List me all the last names",
4647
source=[str(text), str(text)],
47-
config=graph_config
48+
config=graph_config,
4849
)
4950

5051
result = csv_scraper_graph.run()
@@ -56,7 +57,3 @@
5657

5758
graph_exec_info = csv_scraper_graph.get_execution_info()
5859
print(prettify_exec_info(graph_exec_info))
59-
60-
# Save to json or csv
61-
convert_to_csv(result, "result")
62-
convert_to_json(result, "result")

examples/csv_scraper_graph/ollama/csv_scraper_ollama.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
"""
44

55
import os
6-
import pandas as pd
6+
77
from scrapegraphai.graphs import CSVScraperGraph
8-
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
8+
from scrapegraphai.utils import prettify_exec_info
99

1010
# ************************************************
1111
# Read the CSV file
@@ -15,7 +15,8 @@
1515
curr_dir = os.path.dirname(os.path.realpath(__file__))
1616
file_path = os.path.join(curr_dir, FILE_NAME)
1717

18-
text = pd.read_csv(file_path)
18+
with open(file_path, "r") as file:
19+
text = file.read()
1920

2021
# ************************************************
2122
# Define the configuration for the graph
@@ -44,7 +45,7 @@
4445
csv_scraper_graph = CSVScraperGraph(
4546
prompt="List me all the last names",
4647
source=str(text), # Pass the content of the file, not the file object
47-
config=graph_config
48+
config=graph_config,
4849
)
4950

5051
result = csv_scraper_graph.run()
@@ -56,7 +57,3 @@
5657

5758
graph_exec_info = csv_scraper_graph.get_execution_info()
5859
print(prettify_exec_info(graph_exec_info))
59-
60-
# Save to json or csv
61-
convert_to_csv(result, "result")
62-
convert_to_json(result, "result")

examples/csv_scraper_graph/openai/csv_scraper_graph_multi_openai.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
"""
22
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
33
"""
4+
45
import os
6+
57
from dotenv import load_dotenv
6-
import pandas as pd
8+
79
from scrapegraphai.graphs import CSVScraperMultiGraph
8-
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
10+
from scrapegraphai.utils import prettify_exec_info
911

1012
load_dotenv()
1113
# ************************************************
@@ -16,15 +18,16 @@
1618
curr_dir = os.path.dirname(os.path.realpath(__file__))
1719
file_path = os.path.join(curr_dir, FILE_NAME)
1820

19-
text = pd.read_csv(file_path)
21+
with open(file_path, "r") as file:
22+
text = file.read()
2023

2124
# ************************************************
2225
# Define the configuration for the graph
2326
# ************************************************
2427
openai_key = os.getenv("OPENAI_APIKEY")
2528

2629
graph_config = {
27-
"llm": {
30+
"llm": {
2831
"api_key": openai_key,
2932
"model": "openai/gpt-4o",
3033
},
@@ -37,7 +40,7 @@
3740
csv_scraper_graph = CSVScraperMultiGraph(
3841
prompt="List me all the last names",
3942
source=[str(text), str(text)],
40-
config=graph_config
43+
config=graph_config,
4144
)
4245

4346
result = csv_scraper_graph.run()
@@ -49,7 +52,3 @@
4952

5053
graph_exec_info = csv_scraper_graph.get_execution_info()
5154
print(prettify_exec_info(graph_exec_info))
52-
53-
# Save to json or csv
54-
convert_to_csv(result, "result")
55-
convert_to_json(result, "result")

examples/csv_scraper_graph/openai/csv_scraper_openai.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
"""
22
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
33
"""
4+
45
import os
6+
57
from dotenv import load_dotenv
6-
import pandas as pd
8+
79
from scrapegraphai.graphs import CSVScraperGraph
8-
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
10+
from scrapegraphai.utils import prettify_exec_info
911

1012
load_dotenv()
1113

@@ -17,7 +19,8 @@
1719
curr_dir = os.path.dirname(os.path.realpath(__file__))
1820
file_path = os.path.join(curr_dir, FILE_NAME)
1921

20-
text = pd.read_csv(file_path)
22+
with open(file_path, "r") as file:
23+
text = file.read()
2124

2225
# ************************************************
2326
# Define the configuration for the graph
@@ -39,7 +42,7 @@
3942
csv_scraper_graph = CSVScraperGraph(
4043
prompt="List me all the last names",
4144
source=str(text), # Pass the content of the file, not the file object
42-
config=graph_config
45+
config=graph_config,
4346
)
4447

4548
result = csv_scraper_graph.run()
@@ -51,7 +54,3 @@
5154

5255
graph_exec_info = csv_scraper_graph.get_execution_info()
5356
print(prettify_exec_info(graph_exec_info))
54-
55-
# Save to json or csv
56-
convert_to_csv(result, "result")
57-
convert_to_json(result, "result")

examples/readme.md

+49-19
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,62 @@
1-
# Scrapegraph-ai Examples
1+
# 🕷️ Scrapegraph-ai Examples
22

3-
This directory contains various example implementations of Scrapegraph-ai for different use cases.
3+
This directory contains various example implementations of Scrapegraph-ai for different use cases. Each example demonstrates how to leverage the power of Scrapegraph-ai for specific scenarios.
44

5-
If you want more specific examples, visit [this](https://github.com/ScrapeGraphAI/ScrapegraphLib-Examples).
5+
> **Note:** While these examples showcase implementations using OpenAI and Ollama, Scrapegraph-ai supports many other LLM providers! Check out our [documentation](https://docs-oss.scrapegraphai.com/examples) for the full list of supported providers.
66
7-
## Available Examples
7+
## 📚 Available Examples
88

9-
- `smart_scraper/` - Advanced web scraping with intelligent content extraction
10-
- `depth_search_graph/` - Deep web crawling and content exploration
11-
- `csv_scraper_graph/` - Scraping and processing data into CSV format
12-
- `xml_scraper_graph/` - XML data extraction and processing
13-
- `speech_graph/` - Speech processing and analysis
14-
- `omni_scraper_graph/` - Universal web scraping for multiple data types
15-
- `omni_search_graph/` - Comprehensive search across multiple sources
16-
- `document_scraper_graph/` - Document parsing and data extraction
17-
- `script_generator_graph/` - Automated script generation
18-
- `custom_graph/` - Custom graph implementation examples
19-
- `code_generator_graph/` - Code generation utilities
20-
- `json_scraper_graph/` - JSON data extraction and processing
21-
- `search_graph/` - Web search and data retrieval
9+
- 🧠 `smart_scraper/` - Advanced web scraping with intelligent content extraction
10+
- 🔎 `search_graph/` - Web search and data retrieval
11+
- ⚙️ `script_generator_graph/` - Automated script generation
12+
- 🌐 `depth_search_graph/` - Deep web crawling and content exploration
13+
- 📊 `csv_scraper_graph/` - Scraping and processing data into CSV format
14+
- 📑 `xml_scraper_graph/` - XML data extraction and processing
15+
- 🎤 `speech_graph/` - Speech processing and analysis
16+
- 🔄 `omni_scraper_graph/` - Universal web scraping for multiple data types
17+
- 🔍 `omni_search_graph/` - Comprehensive search across multiple sources
18+
- 📄 `document_scraper_graph/` - Document parsing and data extraction
19+
- 🛠️ `custom_graph/` - Custom graph implementation examples
20+
- 💻 `code_generator_graph/` - Code generation utilities
21+
- 📋 `json_scraper_graph/` - JSON data extraction and processing
2222

23-
## Getting Started
23+
## 🚀 Getting Started
2424

2525
1. Choose the example that best fits your use case
2626
2. Navigate to the corresponding directory
2727
3. Follow the README instructions in each directory
2828
4. Configure any required environment variables using the provided `.env.example` files
2929

30-
## Requirements
30+
## ⚡ Quick Setup
31+
32+
```bash
33+
pip install scrapegraphai
34+
35+
playwright install
36+
37+
# choose an example
38+
cd examples/smart_scraper_graph/openai
39+
40+
# run the example
41+
python smart_scraper_openai.py
42+
```
43+
44+
## 📋 Requirements
3145

3246
Each example may have its own specific requirements. Please refer to the individual README files in each directory for detailed setup instructions.
47+
48+
## 📚 Additional Resources
49+
50+
- 📖 [Full Documentation](https://docs-oss.scrapegraphai.com/examples)
51+
- 💡 [Examples Repository](https://github.com/ScrapeGraphAI/ScrapegraphLib-Examples)
52+
- 🤝 [Community Support](https://github.com/ScrapeGraphAI/scrapegraph-ai/discussions)
53+
54+
## 🤔 Need Help?
55+
56+
- Check out our [documentation](https://docs-oss.scrapegraphai.com)
57+
- Join our [Discord community](https://discord.gg/scrapegraphai)
58+
- Open an [issue](https://github.com/ScrapeGraphAI/scrapegraph-ai/issues)
59+
60+
---
61+
62+
⭐ Don't forget to star our repository if you find these examples helpful!

examples/smart_scraper/.env.example renamed to examples/smart_scraper_graph/.env.example

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ OPENAI_API_KEY=your-openai-api-key-here
44
# Optional Configurations
55
MAX_TOKENS=4000
66
MODEL_NAME=gpt-4-1106-preview
7-
TEMPERATURE=0.7
7+
TEMPERATURE=0.7

examples/smart_scraper/README.md renamed to examples/smart_scraper_graph/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,4 @@ results = graph.scrape("https://example.com")
2727
## Environment Variables
2828

2929
Required environment variables:
30-
- `OPENAI_API_KEY`: Your OpenAI API key
30+
- `OPENAI_API_KEY`: Your OpenAI API key

examples/smart_scraper/ollama/smart_scraper_lite_ollama.py renamed to examples/smart_scraper_graph/ollama/smart_scraper_lite_ollama.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
"""
1+
"""
22
Basic example of scraping pipeline using SmartScraper
33
44
"""
5+
56
import json
7+
68
from scrapegraphai.graphs import SmartScraperLiteGraph
79
from scrapegraphai.utils import prettify_exec_info
810

@@ -14,13 +16,13 @@
1416
"base_url": "http://localhost:11434",
1517
},
1618
"verbose": True,
17-
"headless": False
19+
"headless": False,
1820
}
1921

2022
smart_scraper_lite_graph = SmartScraperLiteGraph(
2123
prompt="Who is Marco Perini?",
2224
source="https://perinim.github.io/",
23-
config=graph_config
25+
config=graph_config,
2426
)
2527

2628
result = smart_scraper_lite_graph.run()

examples/smart_scraper/ollama/smart_scraper_multi_concat_ollama.py renamed to examples/smart_scraper_graph/ollama/smart_scraper_multi_concat_ollama.py

+7-9
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
"""
1+
"""
22
Basic example of scraping pipeline using SmartScraper
33
"""
44

5-
import os
65
import json
6+
77
from dotenv import load_dotenv
8+
89
from scrapegraphai.graphs import SmartScraperMultiConcatGraph
910

1011
load_dotenv()
@@ -18,10 +19,10 @@
1819
"model": "ollama/llama3.1",
1920
"temperature": 0,
2021
"format": "json", # Ollama needs the format to be specified explicitly
21-
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
22+
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
2223
},
2324
"verbose": True,
24-
"headless": False
25+
"headless": False,
2526
}
2627

2728
# *******************************************************
@@ -30,12 +31,9 @@
3031

3132
multiple_search_graph = SmartScraperMultiConcatGraph(
3233
prompt="Who is Marco Perini?",
33-
source= [
34-
"https://perinim.github.io/",
35-
"https://perinim.github.io/cv/"
36-
],
34+
source=["https://perinim.github.io/", "https://perinim.github.io/cv/"],
3735
schema=None,
38-
config=graph_config
36+
config=graph_config,
3937
)
4038

4139
result = multiple_search_graph.run()

examples/smart_scraper/ollama/smart_scraper_multi_lite_ollama.py renamed to examples/smart_scraper_graph/ollama/smart_scraper_multi_lite_ollama.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
"""
1+
"""
22
Basic example of scraping pipeline using SmartScraper
33
"""
4+
45
import json
6+
57
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
68
from scrapegraphai.utils import prettify_exec_info
79

@@ -17,7 +19,7 @@
1719
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
1820
},
1921
"verbose": True,
20-
"headless": False
22+
"headless": False,
2123
}
2224

2325
# ************************************************
@@ -26,11 +28,8 @@
2628

2729
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
2830
prompt="Who is Marco Perini?",
29-
source= [
30-
"https://perinim.github.io/",
31-
"https://perinim.github.io/cv/"
32-
],
33-
config=graph_config
31+
source=["https://perinim.github.io/", "https://perinim.github.io/cv/"],
32+
config=graph_config,
3433
)
3534

3635
result = smart_scraper_multi_lite_graph.run()
@@ -42,4 +41,3 @@
4241

4342
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
4443
print(prettify_exec_info(graph_exec_info))
45-

0 commit comments

Comments
 (0)