1
- """
1
+ """
2
2
Basic example of scraping pipeline using SmartScraper with schema
3
3
"""
4
+
4
5
import json
5
- from typing import List
6
+
6
7
from pydantic import BaseModel , Field
8
+
7
9
from scrapegraphai .graphs import SmartScraperGraph
8
10
from scrapegraphai .utils import prettify_exec_info
9
11
12
+
10
13
# ************************************************
11
14
# Define the configuration for the graph
12
15
# ************************************************
13
16
class Project (BaseModel ):
14
17
title : str = Field (description = "The title of the project" )
15
18
description : str = Field (description = "The description of the project" )
16
19
20
+
17
21
class Projects (BaseModel ):
18
- projects : List [Project ]
22
+ projects : list [Project ]
23
+
19
24
20
25
graph_config = {
21
- "llm" : {
22
- "model" : "ollama/llama3.1" ,
23
- "temperature" : 0 ,
24
- "format" : "json" , # Ollama needs the format to be specified explicitly
25
- # "base_url": "http://localhost:11434", # set ollama URL arbitrarily
26
- },
26
+ "llm" : {"model" : "ollama/llama3.2" , "temperature" : 0 , "model_tokens" : 4096 },
27
27
"verbose" : True ,
28
- "headless" : False
28
+ "headless" : False ,
29
29
}
30
30
31
31
# ************************************************
@@ -36,8 +36,15 @@ class Projects(BaseModel):
36
36
prompt = "List me all the projects with their description" ,
37
37
source = "https://perinim.github.io/projects/" ,
38
38
schema = Projects ,
39
- config = graph_config
39
+ config = graph_config ,
40
40
)
41
41
42
42
result = smart_scraper_graph .run ()
43
43
print (json .dumps (result , indent = 4 ))
44
+
45
+ # ************************************************
46
+ # Get graph execution info
47
+ # ************************************************
48
+
49
+ graph_exec_info = smart_scraper_graph .get_execution_info ()
50
+ print (prettify_exec_info (graph_exec_info ))
0 commit comments