Skip to content

Commit d4b2679

Browse files
committed
fix: search graph
1 parent a9569ac commit d4b2679

File tree

3 files changed

+16
-3
lines changed

3 files changed

+16
-3
lines changed

scrapegraphai/nodes/merge_answers_node.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,17 @@ def execute(self, state: dict) -> dict:
9696

9797
merge_chain = prompt_template | self.llm_model | output_parser
9898
answer = merge_chain.invoke({"user_prompt": user_prompt})
99-
answer["sources"] = state.get("urls", [])
99+
100+
# Get the URLs from the state, ensuring we get the actual URLs used for scraping
101+
urls = []
102+
if "urls" in state:
103+
urls = state["urls"]
104+
elif "considered_urls" in state:
105+
urls = state["considered_urls"]
106+
107+
# Only add sources if we actually have URLs
108+
if urls:
109+
answer["sources"] = urls
100110

101111
state.update({self.output[0]: answer})
102112
return state

scrapegraphai/nodes/search_internet_node.py

+3
Original file line numberDiff line numberDiff line change
@@ -99,5 +99,8 @@ def execute(self, state: dict) -> dict:
9999
if len(answer) == 0:
100100
raise ValueError("Zero results found for the search query.")
101101

102+
# Store both the URLs and considered_urls in the state
102103
state.update({self.output[0]: answer})
104+
state["considered_urls"] = answer # Add this as a backup
105+
103106
return state

scrapegraphai/utils/research_web.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def search_on_web(query: str, search_engine: str = "Google",
4141
research = DuckDuckGoSearchResults(max_results=max_results)
4242
res = research.run(query)
4343
links = re.findall(r'https?://[^\s,\]]+', res)
44-
return links
44+
return links[:max_results]
4545

4646
elif search_engine.lower() == "bing":
4747
headers = {
@@ -66,7 +66,7 @@ def search_on_web(query: str, search_engine: str = "Google",
6666
response = requests.get(url, params=params)
6767

6868
data = response.json()
69-
limited_results = data["results"][:max_results]
69+
limited_results = [result['url'] for result in data["results"][:max_results]]
7070
return limited_results
7171

7272
else:

0 commit comments

Comments
 (0)