Skip to content

Commit 1409797

Browse files
committed
docs: refactor nodes docstrings
1 parent e981796 commit 1409797

12 files changed

+194
-293
lines changed

scrapegraphai/nodes/base_node.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Module for defining BaseNode, an abstract base class for nodes in a graph-based workflow.
2+
BaseNode Module
33
"""
44

55
from abc import ABC, abstractmethod

scrapegraphai/nodes/fetch_node.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Module for fetching the HTML node
2+
FetchNode Module
33
"""
44

55
from typing import List, Optional
@@ -27,10 +27,6 @@ class FetchNode(BaseNode):
2727
output (List[str]): List of output keys to be updated in the state.
2828
node_config (Optional[dict]): Additional configuration for the node.
2929
node_name (str): The unique identifier name for the node, defaulting to "Fetch".
30-
31-
Methods:
32-
execute(state): Fetches the HTML content for the URL specified in the state
33-
and updates the state with the fetched content under the specified output key.
3430
"""
3531

3632
def __init__(self, input: str, output: List[str], node_config: Optional[dict], node_name: str = "Fetch"):
@@ -45,13 +41,14 @@ def execute(self, state):
4541
update the state with this content.
4642
4743
Args:
48-
state (dict): The current state of the graph, expected to contain a 'url' key.
44+
state (dict): The current state of the graph. The input keys will be used
45+
to fetch the correct data types from the state.
4946
5047
Returns:
51-
dict: The updated state with a new 'document' key containing the fetched HTML content.
48+
dict: The updated state with a new output key containing the fetched HTML content.
5249
5350
Raises:
54-
KeyError: If the 'url' key is not found in the state, indicating that the
51+
KeyError: If the input key is not found in the state, indicating that the
5552
necessary information to perform the operation is missing.
5653
"""
5754
if self.verbose:

scrapegraphai/nodes/generate_answer_node.py

Lines changed: 15 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
2-
Module for generating the answer node
2+
GenerateAnswerNode Module
33
"""
4+
45
# Imports from standard library
56
from typing import List
67
from tqdm import tqdm
@@ -16,57 +17,43 @@
1617

1718
class GenerateAnswerNode(BaseNode):
1819
"""
19-
A node that generates an answer using a language model (LLM) based on the user's input
20+
A node that generates an answer using a large language model (LLM) based on the user's input
2021
and the content extracted from a webpage. It constructs a prompt from the user's input
2122
and the scraped content, feeds it to the LLM, and parses the LLM's response to produce
2223
an answer.
2324
2425
Attributes:
25-
llm: An instance of a language model client, configured for generating answers.
26-
node_name (str): The unique identifier name for the node, defaulting
27-
to "GenerateAnswerNode".
28-
node_type (str): The type of the node, set to "node" indicating a
29-
standard operational node.
26+
llm_model: An instance of a language model client, configured for generating answers.
27+
verbose (bool): A flag indicating whether to show print statements during execution.
3028
3129
Args:
32-
llm: An instance of the language model client (e.g., ChatOpenAI) used
33-
for generating answers.
34-
node_name (str, optional): The unique identifier name for the node.
35-
Defaults to "GenerateAnswerNode".
36-
37-
Methods:
38-
execute(state): Processes the input and document from the state to generate an answer,
39-
updating the state with the generated answer under the 'answer' key.
30+
input (str): Boolean expression defining the input keys needed from the state.
31+
output (List[str]): List of output keys to be updated in the state.
32+
node_config (dict): Additional configuration for the node.
33+
node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswer".
4034
"""
4135

4236
def __init__(self, input: str, output: List[str], node_config: dict,
4337
node_name: str = "GenerateAnswer"):
44-
"""
45-
Initializes the GenerateAnswerNode with a language model client and a node name.
46-
Args:
47-
llm: An instance of the OpenAIImageToText class.
48-
node_name (str): name of the node
49-
"""
5038
super().__init__(node_name, "node", input, output, 2, node_config)
39+
5140
self.llm_model = node_config["llm"]
5241
self.verbose = True if node_config is None else node_config.get("verbose", False)
5342

54-
def execute(self, state):
43+
def execute(self, state: dict) -> dict:
5544
"""
5645
Generates an answer by constructing a prompt from the user's input and the scraped
5746
content, querying the language model, and parsing its response.
5847
59-
The method updates the state with the generated answer under the 'answer' key.
60-
6148
Args:
62-
state (dict): The current state of the graph, expected to contain 'user_input',
63-
and optionally 'parsed_document' or 'relevant_chunks' within 'keys'.
49+
state (dict): The current state of the graph. The input keys will be used
50+
to fetch the correct data from the state.
6451
6552
Returns:
66-
dict: The updated state with the 'answer' key containing the generated answer.
53+
dict: The updated state with the output key containing the generated answer.
6754
6855
Raises:
69-
KeyError: If 'user_input' or 'document' is not found in the state, indicating
56+
KeyError: If the input keys are not found in the state, indicating
7057
that the necessary information for generating an answer is missing.
7158
"""
7259

scrapegraphai/nodes/generate_scraper_node.py

Lines changed: 22 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
2-
Module for generating the answer node
2+
GenerateScraperNode Module
33
"""
4+
45
# Imports from standard library
56
from typing import List
67
from tqdm import tqdm
@@ -16,58 +17,46 @@
1617

1718
class GenerateScraperNode(BaseNode):
1819
"""
19-
A node that generates an answer using a language model (LLM) based on the user's input
20-
and the content extracted from a webpage. It constructs a prompt from the user's input
21-
and the scraped content, feeds it to the LLM, and parses the LLM's response to produce
22-
an answer.
20+
Generates a python script for scraping a website using the specified library.
21+
It takes the user's prompt and the scraped content as input and generates a python script
22+
that extracts the information requested by the user.
2323
2424
Attributes:
25-
llm: An instance of a language model client, configured for generating answers.
26-
node_name (str): The unique identifier name for the node, defaulting
27-
to "GenerateScraperNode".
28-
node_type (str): The type of the node, set to "node" indicating a
29-
standard operational node.
25+
llm_model: An instance of a language model client, configured for generating answers.
26+
library (str): The python library to use for scraping the website.
27+
source (str): The website to scrape.
3028
3129
Args:
32-
llm: An instance of the language model client (e.g., ChatOpenAI) used
33-
for generating answers.
34-
node_name (str, optional): The unique identifier name for the node.
35-
Defaults to "GenerateScraperNode".
36-
37-
Methods:
38-
execute(state): Processes the input and document from the state to generate an answer,
39-
updating the state with the generated answer under the 'answer' key.
30+
input (str): Boolean expression defining the input keys needed from the state.
31+
output (List[str]): List of output keys to be updated in the state.
32+
node_config (dict): Additional configuration for the node.
33+
library (str): The python library to use for scraping the website.
34+
website (str): The website to scrape.
35+
node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswer".
36+
4037
"""
4138

4239
def __init__(self, input: str, output: List[str], node_config: dict,
4340
library: str, website: str, node_name: str = "GenerateAnswer"):
44-
"""
45-
Initializes the GenerateScraperNode with a language model client and a node name.
46-
Args:
47-
llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
48-
node_name (str): name of the node
49-
"""
5041
super().__init__(node_name, "node", input, output, 2, node_config)
42+
5143
self.llm_model = node_config["llm"]
5244
self.library = library
5345
self.source = website
5446

55-
def execute(self, state):
47+
def execute(self, state: dict) -> dict:
5648
"""
57-
Generates an answer by constructing a prompt from the user's input and the scraped
58-
content, querying the language model, and parsing its response.
59-
60-
The method updates the state with the generated answer under the 'answer' key.
49+
Generates a python script for scraping a website using the specified library.
6150
6251
Args:
63-
state (dict): The current state of the graph, expected to contain 'user_input',
64-
and optionally 'parsed_document' or 'relevant_chunks' within 'keys'.
52+
state (dict): The current state of the graph. The input keys will be used
53+
to fetch the correct data from the state.
6554
6655
Returns:
67-
dict: The updated state with the 'answer' key containing the generated answer.
56+
dict: The updated state with the output key containing the generated answer.
6857
6958
Raises:
70-
KeyError: If 'user_input' or 'document' is not found in the state, indicating
59+
KeyError: If input keys are not found in the state, indicating
7160
that the necessary information for generating an answer is missing.
7261
"""
7362

scrapegraphai/nodes/get_probable_tags_node.py

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
2-
Module for proobable tags
2+
GetProbableTagsNode Module
33
"""
4+
45
from typing import List
56
from langchain.output_parsers import CommaSeparatedListOutputParser
67
from langchain.prompts import PromptTemplate
@@ -15,47 +16,36 @@ class GetProbableTagsNode(BaseNode):
1516
list of probable tags.
1617
1718
Attributes:
18-
llm: An instance of a language model client, configured for generating tag predictions.
19-
node_name (str): The unique identifier name for the node,
20-
defaulting to "GetProbableTagsNode".
21-
node_type (str): The type of the node, set to "node" indicating a standard operational node.
19+
llm_model: An instance of the language model client used for tag predictions.
2220
2321
Args:
24-
llm: An instance of the language model client (e.g., ChatOpenAI) used for tag predictions.
25-
node_name (str, optional): The unique identifier name for the node.
26-
Defaults to "GetProbableTagsNode".
27-
28-
Methods:
29-
execute(state): Processes the user's input and the URL from the state to generate a list of
30-
probable HTML tags, updating the state with these tags under the 'tags' key.
22+
input (str): Boolean expression defining the input keys needed from the state.
23+
output (List[str]): List of output keys to be updated in the state.
24+
model_config (dict): Additional configuration for the language model.
25+
node_name (str): The unique identifier name for the node, defaulting to "GetProbableTags".
3126
"""
3227

3328
def __init__(self, input: str, output: List[str], model_config: dict,
3429
node_name: str = "GetProbableTags"):
35-
"""
36-
Initializes the GetProbableTagsNode with a language model client and a node name.
37-
Args:
38-
llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
39-
node_name (str): name of the node
40-
"""
4130
super().__init__(node_name, "node", input, output, 2, model_config)
31+
4232
self.llm_model = model_config["llm_model"]
4333

44-
def execute(self, state):
34+
def execute(self, state: dict) -> dict:
4535
"""
4636
Generates a list of probable HTML tags based on the user's input and updates the state
4737
with this list. The method constructs a prompt for the language model, submits it, and
4838
parses the output to identify probable tags.
4939
5040
Args:
51-
state (dict): The current state of the graph, expected to contain 'user_input', 'url',
52-
and optionally 'document' within 'keys'.
41+
state (dict): The current state of the graph. The input keys will be used to fetch the
42+
correct data types from the state.
5343
5444
Returns:
55-
dict: The updated state with the 'tags' key containing a list of probable HTML tags.
45+
dict: The updated state with the input key containing a list of probable HTML tags.
5646
5747
Raises:
58-
KeyError: If 'user_input' or 'url' is not found in the state, indicating that the
48+
KeyError: If input keys are not found in the state, indicating that the
5949
necessary information for generating tag predictions is missing.
6050
"""
6151

scrapegraphai/nodes/image_to_text_node.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,44 @@
11
"""
2-
Module for the ImageToTextNode class.
2+
ImageToTextNode Module
33
"""
4+
45
from typing import List
56
from .base_node import BaseNode
67

78

89
class ImageToTextNode(BaseNode):
910
"""
10-
A class representing a node that processes an image and returns the text description.
11+
Retrieve an image from an URL and convert it to text using an ImageToText model.
1112
1213
Attributes:
13-
llm_model (OpenAIImageToText): An instance of the OpenAIImageToText class.
14-
15-
Methods:
16-
execute(state, url): Execute the node's logic and return the updated state.
14+
llm_model: An instance of the language model client used for image-to-text conversion.
15+
verbose (bool): A flag indicating whether to show print statements during execution.
16+
17+
Args:
18+
input (str): Boolean expression defining the input keys needed from the state.
19+
output (List[str]): List of output keys to be updated in the state.
20+
node_config (dict): Additional configuration for the node.
21+
node_name (str): The unique identifier name for the node, defaulting to "ImageToText".
1722
"""
1823

1924
def __init__(self, input: str, output: List[str], node_config: dict,
2025
node_name: str = "ImageToText"):
21-
"""
22-
Initializes an instance of the ImageToTextNode class.
23-
24-
Args:
25-
input (str): The input for the node.
26-
output (List[str]): The output of the node.
27-
node_config (dict): Configuration for the model.
28-
node_name (str): Name of the node.
29-
"""
3026
super().__init__(node_name, "node", input, output, 1, node_config)
27+
3128
self.llm_model = node_config["llm_model"]
3229
self.verbose = True if node_config is None else node_config.get("verbose", False)
3330

3431
def execute(self, state: dict) -> dict:
3532
"""
36-
Execute the node's logic and return the updated state.
33+
Generate text from an image using an image-to-text model. The method retrieves the image
34+
from the URL provided in the state.
3735
3836
Args:
39-
state (dict): The current state of the graph.
37+
state (dict): The current state of the graph. The input keys will be used to fetch the
38+
correct data types from the state.
4039
4140
Returns:
42-
dict: The updated state after executing this node.
41+
dict: The updated state with the input key containing the text extracted from the image.
4342
"""
4443

4544
if self.verbose:

0 commit comments

Comments
 (0)