@@ -538,12 +538,15 @@ def req(self, url, cookies=None):
538
538
self .print_ (e )
539
539
resp = None
540
540
return resp
541
+
542
+ def should_sleep (self ):
543
+ time .sleep (random .randint (1 , 2 ))
544
+ return
541
545
542
546
def get_next (self , resp ):
543
- link_regx = re .compile ('<A href="(.*?)"><b> Next page</b></a> ' )
547
+ link_regx = re .compile ('<a.*? href="(.*?)">Next Page ' )
544
548
link = link_regx .findall (resp )
545
- link = re .sub ('host=.*?%s' % self .domain , 'host=%s' % self .domain , link [0 ])
546
- url = 'http://searchdns.netcraft.com' + link
549
+ url = 'http://searchdns.netcraft.com' + link [0 ]
547
550
return url
548
551
549
552
def create_cookies (self , cookie ):
@@ -569,14 +572,15 @@ def enumerate(self):
569
572
while True :
570
573
resp = self .get_response (self .req (url , cookies ))
571
574
self .extract_domains (resp )
572
- if 'Next page ' not in resp :
575
+ if 'Next Page ' not in resp :
573
576
return self .subdomains
574
577
break
575
578
url = self .get_next (resp )
579
+ self .should_sleep ()
576
580
577
581
def extract_domains (self , resp ):
578
582
links_list = list ()
579
- link_regx = re .compile ('<a href="http://toolbar.netcraft.com/site_report\?url= (.*)"> ' )
583
+ link_regx = re .compile ('<a class="results-table__host" href=" (.*?)" ' )
580
584
try :
581
585
links_list = link_regx .findall (resp )
582
586
for link in links_list :
0 commit comments