7
7
from googlesearch import search as google_search
8
8
import requests
9
9
from bs4 import BeautifulSoup
10
+ import json
10
11
11
12
def search_on_web (query : str , search_engine : str = "Google" ,
12
13
max_results : int = 10 , port : int = 8080 ,
13
- timeout : int = 10 , proxy : str | dict = None ) -> List [str ]:
14
+ timeout : int = 10 , proxy : str | dict = None ,
15
+ serper_api_key : str = None ) -> List [str ]:
14
16
"""Search web function with improved error handling and validation"""
15
17
16
18
# Input validation
17
19
if not query or not isinstance (query , str ):
18
20
raise ValueError ("Query must be a non-empty string" )
19
21
20
22
search_engine = search_engine .lower ()
21
- valid_engines = {"google" , "duckduckgo" , "bing" , "searxng" }
23
+ valid_engines = {"google" , "duckduckgo" , "bing" , "searxng" , "serper" }
22
24
if search_engine not in valid_engines :
23
25
raise ValueError (f"Search engine must be one of: { ', ' .join (valid_engines )} " )
24
26
@@ -42,7 +44,10 @@ def search_on_web(query: str, search_engine: str = "Google",
42
44
43
45
elif search_engine == "searxng" :
44
46
results = _search_searxng (query , max_results , port , timeout )
45
-
47
+
48
+ elif search_engine .lower () == "serper" :
49
+ results = _search_serper (query , max_results , serper_api_key , timeout )
50
+
46
51
return filter_pdf_links (results )
47
52
48
53
except requests .Timeout :
@@ -76,6 +81,25 @@ def _search_searxng(query: str, max_results: int, port: int, timeout: int) -> Li
76
81
response .raise_for_status ()
77
82
return [result ['url' ] for result in response .json ().get ("results" , [])[:max_results ]]
78
83
84
+ def _search_serper (query : str , max_results : int , serper_api_key : str , timeout : int ) -> List [str ]:
85
+ """Helper function for serper api"""
86
+ if not serper_api_key :
87
+ raise ValueError ("API key is required for serper api." )
88
+
89
+ url = "https://google.serper.dev/search"
90
+ payload = json .dumps ({
91
+ "q" : query ,
92
+ "num" : max_results
93
+ })
94
+ headers = {
95
+ 'X-API-KEY' : serper_api_key ,
96
+ 'Content-Type' : 'application/json'
97
+ }
98
+ response = requests .post (url , headers = headers , data = payload , timeout = timeout )
99
+ response .raise_for_status ()
100
+ return [result .get ("link" ) for result in response .json ().get ("organic" , [])]
101
+
102
+
79
103
def format_proxy (proxy ):
80
104
if isinstance (proxy , dict ):
81
105
server = proxy .get ('server' )
@@ -102,4 +126,4 @@ def filter_pdf_links(links: List[str]) -> List[str]:
102
126
Returns:
103
127
List[str]: A list of URLs excluding any that end with '.pdf'.
104
128
"""
105
- return [link for link in links if not link .lower ().endswith ('.pdf' )]
129
+ return [link for link in links if not link .lower ().endswith ('.pdf' )]
0 commit comments