Skip to content

Commit aeb1acb

Browse files
committed
feat: refactoring search function
1 parent 2abe05a commit aeb1acb

File tree

3 files changed

+22
-7
lines changed

3 files changed

+22
-7
lines changed

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ free-proxy = "1.1.1"
4141
langchain-groq = "0.1.3"
4242
playwright = "^1.43.0"
4343
langchain-aws = "^0.1.2"
44-
44+
langchain-anthropic = "^0.1.11"
45+
yahoo-search-py=="^0.3"
4546

4647
[tool.poetry.dev-dependencies]
4748
pytest = "8.0.0"

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,5 @@ free-proxy==1.1.1
1515
langchain-groq==0.1.3
1616
playwright==1.43.0
1717
langchain-aws==0.1.2
18+
langchain-anthropic==0.1.11
19+
yahoo-search-py==0.3

scrapegraphai/utils/research_web.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
"""
1+
"""
22
Module for making the request on the web
33
"""
44
import re
55
from typing import List
66
from langchain_community.tools import DuckDuckGoSearchResults
7-
from googlesearch import search
7+
from googlesearch import search as google_search
8+
from yahoo_search import search as yahoo_search
89

910

1011
def search_on_web(query: str, search_engine: str = "Google", max_results: int = 10) -> List[str]:
@@ -29,18 +30,29 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int =
2930
This function allows switching between Google and DuckDuckGo to perform internet searches, returning a list of result URLs.
3031
"""
3132

32-
if search_engine == "Google":
33+
if search_engine.lower() == "google":
3334
res = []
3435

35-
for url in search(query, stop=max_results):
36+
for url in google_search(query, stop=max_results):
3637
res.append(url)
3738
return res
38-
elif search_engine == "DuckDuckGo":
39+
elif search_engine.lower() == "duckduckgo":
3940
research = DuckDuckGoSearchResults(max_results=max_results)
4041
res = research.run(query)
4142

4243
links = re.findall(r'https?://[^\s,\]]+', res)
4344

4445
return links
46+
elif search_engine.lower() == "yahoo":
47+
list_result = yahoo_search(query)
48+
results = []
49+
for page in list_result.pages:
50+
if len(results) >= max_results: # Check if max_results has already been reached
51+
break # Exit loop if max_results has been reached
52+
try:
53+
results.append(page.link)
54+
except AttributeError:
55+
continue
56+
return results
4557
raise ValueError(
46-
"The only search engines avaiable are DuckDuckGo or Google")
58+
"The only search engines available are DuckDuckGo or Google")

0 commit comments

Comments
 (0)