Skip to content

Commit bbc7184

Browse files
committed
add examples for document_scraper
1 parent 7ed16db commit bbc7184

30 files changed

+745
-128
lines changed

examples/anthropic/depth_search_graph_anthropic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
graph_config = {
1111
"llm": {
1212
"api_key": os.getenv("ANTHROPIC_API_KEY"),
13-
"model": "openai/gpt-4o-mini",
13+
"model": "anthropic/claude-3-haiku-20240307",
1414
},
1515
"verbose": True,
1616
"headless": False,
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""
2+
document_scraper example
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import DocumentScraperGraph
8+
9+
load_dotenv()
10+
11+
12+
# ************************************************
13+
# Define the configuration for the graph
14+
# ************************************************
15+
16+
graph_config = {
17+
"llm": {
18+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
19+
"model": "anthropic/claude-3-haiku-20240307",
20+
}
21+
}
22+
23+
24+
source = """
25+
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
26+
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
27+
Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante
28+
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
29+
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
30+
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
31+
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
32+
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
33+
"""
34+
35+
pdf_scraper_graph = DocumentScraperGraph(
36+
prompt="Summarize the text and find the main topics",
37+
source=source,
38+
config=graph_config,
39+
)
40+
result = pdf_scraper_graph.run()
41+
42+
print(json.dumps(result, indent=4))

examples/anthropic/json_scraper_anthropic.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import os
55
from dotenv import load_dotenv
66
from scrapegraphai.graphs import JSONScraperGraph
7-
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
87

98
load_dotenv()
109

@@ -27,7 +26,7 @@
2726
"llm": {
2827
"api_key": os.getenv("ANTHROPIC_API_KEY"),
2928
"model": "anthropic/claude-3-haiku-20240307",
30-
},
29+
}
3130
}
3231

3332
# ************************************************
@@ -42,15 +41,3 @@
4241

4342
result = json_scraper_graph.run()
4443
print(result)
45-
46-
# ************************************************
47-
# Get graph execution info
48-
# ************************************************
49-
50-
graph_exec_info = json_scraper_graph.get_execution_info()
51-
print(prettify_exec_info(graph_exec_info))
52-
53-
# Save to json or csv
54-
convert_to_csv(result, "result")
55-
convert_to_json(result, "result")
56-

examples/azure/depth_search_graph_azure.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
"""
2-
depth_search_graph_opeani example
2+
depth_search_graph_azure example
33
"""
44
import os
55
from dotenv import load_dotenv
66
from scrapegraphai.graphs import DepthSearchGraph
77

88
load_dotenv()
99

10-
openai_key = os.getenv("OPENAI_APIKEY")
11-
1210
graph_config = {
1311
"llm": {
1412
"api_key": os.environ["AZURE_OPENAI_KEY"],
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""
2+
document_scraper example
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import DocumentScraperGraph
8+
9+
load_dotenv()
10+
11+
12+
# ************************************************
13+
# Define the configuration for the graph
14+
# ************************************************
15+
16+
graph_config = {
17+
"llm": {
18+
"api_key": os.environ["AZURE_OPENAI_KEY"],
19+
"model": "azure_openai/gpt-4o"
20+
},
21+
"verbose": True,
22+
"headless": False
23+
}
24+
25+
26+
source = """
27+
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
28+
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
29+
Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante
30+
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
31+
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
32+
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
33+
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
34+
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
35+
"""
36+
37+
pdf_scraper_graph = DocumentScraperGraph(
38+
prompt="Summarize the text and find the main topics",
39+
source=source,
40+
config=graph_config,
41+
)
42+
result = pdf_scraper_graph.run()
43+
44+
print(json.dumps(result, indent=4))

examples/bedrock/csv_scraper_bedrock.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import pandas as pd
1111

1212
from scrapegraphai.graphs import CSVScraperGraph
13-
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
13+
from scrapegraphai.utils import prettify_exec_info
1414

1515
load_dotenv()
1616

@@ -48,13 +48,3 @@
4848
result = csv_scraper_graph.run()
4949
print(json.dumps(result, indent=4))
5050

51-
# ************************************************
52-
# Get graph execution info
53-
# ************************************************
54-
55-
graph_exec_info = csv_scraper_graph.get_execution_info()
56-
print(prettify_exec_info(graph_exec_info))
57-
58-
# Save to json or csv
59-
convert_to_csv(result, "result")
60-
convert_to_json(result, "result")
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""
2+
document_scraper example
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import DocumentScraperGraph
8+
9+
load_dotenv()
10+
11+
12+
# ************************************************
13+
# Define the configuration for the graph
14+
# ************************************************
15+
16+
graph_config = {
17+
"llm": {
18+
"client": "client_name",
19+
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
20+
"temperature": 0.0
21+
}
22+
}
23+
24+
source = """
25+
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
26+
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
27+
Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante
28+
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
29+
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
30+
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
31+
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
32+
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
33+
"""
34+
35+
pdf_scraper_graph = DocumentScraperGraph(
36+
prompt="Summarize the text and find the main topics",
37+
source=source,
38+
config=graph_config,
39+
)
40+
result = pdf_scraper_graph.run()
41+
42+
print(json.dumps(result, indent=4))
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""
2+
document_scraper example
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import DocumentScraperGraph
8+
9+
load_dotenv()
10+
11+
12+
# ************************************************
13+
# Define the configuration for the graph
14+
# ************************************************
15+
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
16+
17+
graph_config = {
18+
"llm": {
19+
"model": "deepseek/deepseek-chat",
20+
"api_key": deepseek_key,
21+
},
22+
"verbose": True,
23+
}
24+
25+
26+
source = """
27+
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
28+
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
29+
Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante
30+
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
31+
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
32+
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
33+
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
34+
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
35+
"""
36+
37+
pdf_scraper_graph = DocumentScraperGraph(
38+
prompt="Summarize the text and find the main topics",
39+
source=source,
40+
config=graph_config,
41+
)
42+
result = pdf_scraper_graph.run()
43+
44+
print(json.dumps(result, indent=4))

examples/deepseek/json_scraper_deepseek.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import os
55
from dotenv import load_dotenv
66
from scrapegraphai.graphs import JSONScraperGraph
7-
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
7+
from scrapegraphai.utils import prettify_exec_info
88

99
load_dotenv()
1010

@@ -44,14 +44,3 @@
4444

4545
result = json_scraper_graph.run()
4646
print(result)
47-
48-
# ************************************************
49-
# Get graph execution info
50-
# ************************************************
51-
52-
graph_exec_info = json_scraper_graph.get_execution_info()
53-
print(prettify_exec_info(graph_exec_info))
54-
55-
# Save to json or csv
56-
convert_to_csv(result, "result")
57-
convert_to_json(result, "result")
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""
2+
document_scraper example
3+
"""
4+
import os
5+
import json
6+
from scrapegraphai.graphs import DocumentScraperGraph
7+
8+
# ************************************************
9+
# Define the configuration for the graph
10+
# ************************************************
11+
graph_config = {
12+
"llm": {
13+
"model": "ernie/ernie-bot-turbo",
14+
"ernie_client_id": "<ernie_client_id>",
15+
"ernie_client_secret": "<ernie_client_secret>",
16+
"temperature": 0.1
17+
}
18+
}
19+
20+
21+
source = """
22+
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
23+
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
24+
Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante
25+
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
26+
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
27+
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
28+
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
29+
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
30+
"""
31+
32+
pdf_scraper_graph = DocumentScraperGraph(
33+
prompt="Summarize the text and find the main topics",
34+
source=source,
35+
config=graph_config,
36+
)
37+
result = pdf_scraper_graph.run()
38+
39+
print(json.dumps(result, indent=4))

examples/ernie/json_scraper_ernie.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44
import os
55
from scrapegraphai.graphs import JSONScraperGraph
6-
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
6+
from scrapegraphai.utils import prettify_exec_info
77

88
# ************************************************
99
# Read the JSON file
@@ -41,14 +41,3 @@
4141

4242
result = json_scraper_graph.run()
4343
print(result)
44-
45-
# ************************************************
46-
# Get graph execution info
47-
# ************************************************
48-
49-
graph_exec_info = json_scraper_graph.get_execution_info()
50-
print(prettify_exec_info(graph_exec_info))
51-
52-
# Save to json or csv
53-
convert_to_csv(result, "result")
54-
convert_to_json(result, "result")
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""
2+
document_scraper example
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import DocumentScraperGraph
8+
9+
load_dotenv()
10+
11+
# ************************************************
12+
# Define the configuration for the graph
13+
# ************************************************
14+
fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
15+
16+
graph_config = {
17+
"llm": {
18+
"api_key": fireworks_api_key,
19+
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
20+
},
21+
"verbose": True,
22+
"headless": False,
23+
}
24+
25+
26+
source = """
27+
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
28+
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
29+
Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante
30+
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
31+
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
32+
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
33+
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
34+
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
35+
"""
36+
37+
pdf_scraper_graph = DocumentScraperGraph(
38+
prompt="Summarize the text and find the main topics",
39+
source=source,
40+
config=graph_config,
41+
)
42+
result = pdf_scraper_graph.run()
43+
44+
print(json.dumps(result, indent=4))

0 commit comments

Comments
 (0)