Skip to content

Commit 5faac9f

Browse files
authored
Merge pull request #784 from ScrapeGraphAI/pre/beta
fixed json on generate answer
2 parents 7b5010f + 950e859 commit 5faac9f

File tree

6 files changed

+75
-10
lines changed

6 files changed

+75
-10
lines changed

CHANGELOG.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,35 @@
1+
## [1.29.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.28.0...v1.29.0-beta.1) (2024-11-04)
2+
3+
4+
### Features
5+
6+
* Serper API integration for Google search ([c218546](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/c218546a3ddbdf987888e150942a244856af66cc))
7+
8+
9+
### Bug Fixes
10+
11+
* resolved outparser issue ([e8cabfd](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/e8cabfd1ae7cc93abc04745948db1f6933fd2e26))
12+
13+
14+
### CI
15+
16+
* **release:** 1.28.0-beta.3 [skip ci] ([65d39bb](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/65d39bbaf0671fa5ac84705e94adb42078a36c3b))
17+
* **release:** 1.28.0-beta.4 [skip ci] ([b90bb00](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/b90bb00beb8497b8dd16fa4d1ef5af22042a55f3))
18+
19+
## [1.28.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.28.0-beta.3...v1.28.0-beta.4) (2024-11-03)
20+
21+
22+
### Bug Fixes
23+
24+
* resolved outparser issue ([e8cabfd](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/e8cabfd1ae7cc93abc04745948db1f6933fd2e26))
25+
26+
## [1.28.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.28.0-beta.2...v1.28.0-beta.3) (2024-11-02)
27+
28+
29+
### Features
30+
31+
* Serper API integration for Google search ([c218546](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/c218546a3ddbdf987888e150942a244856af66cc))
32+
133
## [1.28.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.27.0...v1.28.0) (2024-11-01)
234

335

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
name = "scrapegraphai"
33

44

5-
version = "1.28.0"
5+
version = "1.29.0b1"
6+
67

78

89

scrapegraphai/graphs/search_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ def _create_graph(self) -> BaseGraph:
6666
"llm_model": self.llm_model,
6767
"max_results": self.max_results,
6868
"loader_kwargs": self.loader_kwargs,
69-
"search_engine": self.copy_config.get("search_engine")
69+
"search_engine": self.copy_config.get("search_engine"),
70+
"serper_api_key": self.copy_config.get("serper_api_key")
7071
}
7172
)
7273

scrapegraphai/nodes/generate_answer_node.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,11 @@ def execute(self, state: dict) -> dict:
122122
partial_variables={"context": doc, "format_instructions": format_instructions}
123123
)
124124
chain = prompt | self.llm_model
125-
raw_response = str((prompt | self.llm_model).invoke({"question": user_prompt}))
125+
raw_response = chain.invoke({"question": user_prompt})
126126

127127
if output_parser:
128128
try:
129-
answer = output_parser.parse(raw_response)
129+
answer = output_parser.parse(raw_response.content)
130130
except JSONDecodeError:
131131
lines = raw_response.split('\n')
132132
if lines[0].strip().startswith('```'):
@@ -136,7 +136,7 @@ def execute(self, state: dict) -> dict:
136136
cleaned_response = '\n'.join(lines)
137137
answer = output_parser.parse(cleaned_response)
138138
else:
139-
answer = raw_response
139+
answer = raw_response.content
140140

141141
state.update({self.output[0]: answer})
142142
return state

scrapegraphai/nodes/search_internet_node.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,13 @@ def __init__(
4747
if node_config.get("search_engine")
4848
else "google"
4949
)
50+
51+
self.serper_api_key = (
52+
node_config["serper_api_key"]
53+
if node_config.get("serper_api_key")
54+
else None
55+
)
56+
5057
self.max_results = node_config.get("max_results", 3)
5158

5259
def execute(self, state: dict) -> dict:
@@ -95,7 +102,7 @@ def execute(self, state: dict) -> dict:
95102
self.logger.info(f"Search Query: {search_query}")
96103

97104
answer = search_on_web(query=search_query, max_results=self.max_results,
98-
search_engine=self.search_engine, proxy=self.proxy)
105+
search_engine=self.search_engine, proxy=self.proxy, serper_api_key=self.serper_api_key)
99106

100107
if len(answer) == 0:
101108
raise ValueError("Zero results found for the search query.")

scrapegraphai/utils/research_web.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,20 @@
77
from googlesearch import search as google_search
88
import requests
99
from bs4 import BeautifulSoup
10+
import json
1011

1112
def search_on_web(query: str, search_engine: str = "Google",
1213
max_results: int = 10, port: int = 8080,
13-
timeout: int = 10, proxy: str | dict = None) -> List[str]:
14+
timeout: int = 10, proxy: str | dict = None,
15+
serper_api_key: str = None) -> List[str]:
1416
"""Search web function with improved error handling and validation"""
1517

1618
# Input validation
1719
if not query or not isinstance(query, str):
1820
raise ValueError("Query must be a non-empty string")
1921

2022
search_engine = search_engine.lower()
21-
valid_engines = {"google", "duckduckgo", "bing", "searxng"}
23+
valid_engines = {"google", "duckduckgo", "bing", "searxng", "serper"}
2224
if search_engine not in valid_engines:
2325
raise ValueError(f"Search engine must be one of: {', '.join(valid_engines)}")
2426

@@ -42,7 +44,10 @@ def search_on_web(query: str, search_engine: str = "Google",
4244

4345
elif search_engine == "searxng":
4446
results = _search_searxng(query, max_results, port, timeout)
45-
47+
48+
elif search_engine.lower() == "serper":
49+
results = _search_serper(query, max_results, serper_api_key, timeout)
50+
4651
return filter_pdf_links(results)
4752

4853
except requests.Timeout:
@@ -76,6 +81,25 @@ def _search_searxng(query: str, max_results: int, port: int, timeout: int) -> Li
7681
response.raise_for_status()
7782
return [result['url'] for result in response.json().get("results", [])[:max_results]]
7883

84+
def _search_serper(query: str, max_results: int, serper_api_key: str, timeout: int) -> List[str]:
85+
"""Helper function for serper api"""
86+
if not serper_api_key:
87+
raise ValueError("API key is required for serper api.")
88+
89+
url = "https://google.serper.dev/search"
90+
payload = json.dumps({
91+
"q": query,
92+
"num": max_results
93+
})
94+
headers = {
95+
'X-API-KEY': serper_api_key,
96+
'Content-Type': 'application/json'
97+
}
98+
response = requests.post(url, headers=headers, data=payload, timeout=timeout)
99+
response.raise_for_status()
100+
return [result.get("link") for result in response.json().get("organic", [])]
101+
102+
79103
def format_proxy(proxy):
80104
if isinstance(proxy, dict):
81105
server = proxy.get('server')
@@ -102,4 +126,4 @@ def filter_pdf_links(links: List[str]) -> List[str]:
102126
Returns:
103127
List[str]: A list of URLs excluding any that end with '.pdf'.
104128
"""
105-
return [link for link in links if not link.lower().endswith('.pdf')]
129+
return [link for link in links if not link.lower().endswith('.pdf')]

0 commit comments

Comments
 (0)