HTTP L1/L2/L3 Proxy Checker/Leecher [Python]

Wednesday, August 1, 2012

HTTP L1/L2/L3 Proxy Checker


Proxy Checker Download Link

 #!/usr/bin/env python  
 #www.linux-ninjas.com  
 import Queue  
 import threading  
 import urllib2  
 import time  
 input_file = 'proxylist.txt'  
 threads = 10  
 queue = Queue.Queue()  
 output = []  
 class ThreadUrl(threading.Thread):  
   """Threaded Url Grab"""  
   def __init__(self, queue):  
     threading.Thread.__init__(self)  
     self.queue = queue  
   def run(self):  
     while True:  
       #grabs host from queue  
       proxy_info = self.queue.get()  
       try:  
         proxy_handler = urllib2.ProxyHandler({'http':proxy_info})  
         opener = urllib2.build_opener(proxy_handler)  
         opener.addheaders = [('User-agent','Mozilla/5.0')]  
         urllib2.install_opener(opener)  
         req = urllib2.Request("http://www.proxylists.net/proxyjudge.php")  
         sock=urllib2.urlopen(req, timeout= 7)  
         rs = sock.read(5000)  
         if '<TITLE>ProxyLists.Net - Proxy judge</TITLE>' in rs:  
             if 'Proxy is high anonymous (or no proxy)' in rs:  
                 output.append(('HighAnon',proxy_info))  
             elif 'Proxy is anonymous' in rs:  
                 output.append(('Anon',proxy_info))  
             elif 'Transparent' in rs:  
                 output.append(('Trans',proxy_info))  
         else:  
             raise "Not Judging"  
       except:  
         output.append(('x',proxy_info))  
       #signals to queue job is done  
       self.queue.task_done()  
 start = time.time()  
 def main():  
   #spawn a pool of threads, and pass them queue instance   
   for i in range(5):  
     t = ThreadUrl(queue)  
     t.setDaemon(True)  
     t.start()  
   hosts = [host.strip() for host in open(input_file).readlines()]  
   #populate queue with data    
   for host in hosts:  
     queue.put(host)  
   #wait on the queue until everything has been processed     
   queue.join()  
 main()  
 for proxy,host in output:  
   if (proxy == 'HighAnon'):  
     print proxy,host  
   if (proxy == 'Anon'):  
     print proxy,host  
   if (proxy == 'Trans'):  
     print proxy,host  
 print "Elapsed Time: %s" % (time.time() - start)  

This doesnt output one at a time, I personally don't mind waiting as I'm never really in a hurry to find a working proxy. Also you could change the main function (and the header) to allow writing to a file per L1/L2/L3:

 #-- This section goes at the top --
 high = open('L1HighAnonList.txt', 'w') 
 anon = open('L2AnonList.txt', 'w')
 tran = open('L3TransList.txt', 'w') 
 #-- This section replaces the main() --
 main()   
  for proxy,host in output:   
   if (proxy == 'HighAnon'):   
    high.writelines(host + '\n')   
   if (proxy == 'Anon'):   
    anon.writelines(host + '\n')   
   if (proxy == 'Trans'):   
    tran.writelines(host + '\n')   
 print "Elapsed Time: %s" % (time.time() - start)

Proxy Leecher


Proxy Leecher Download Link

 #!/usr/bin/env python  
 #www.linux-ninjas.com  
 import re, urllib, collections  
 proxies = open('leechlist.txt', 'w')  
 prox = []  
 urls = ['http://proxy-hunter.blogspot.com.au/2012/07/26-07-12-l1l2l3-http-proxies-1467.html','http://proxy-hunter.blogspot.com.au/2012/07/28-07-12-l1l2l3-http-proxies-676.html','http://proxy-hunter.blogspot.com.au/2012/07/30-07-12-l1l2l3-http-proxies-716.html','http://proxy-hunter.blogspot.com.au/2012/07/31-07-12-l1l2l3-http-proxies-722.html']  
 for url in urls:  
     document = urllib.urlopen(url).read()  
     proxylist = re.findall("\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+", document)  
     for y in proxylist:  
         prox.append(y)  
 newprox=collections.Counter(prox)  
 for item in list(newprox):  
     proxies.writelines(item + '\n')  

The leecher just grabs the urls and looks for 111.222.333.444:[0-9].
I would suggest NOT going to each page and copying them one by one, but look for an RSS Feed, because usually most proxy sites will have a RSS Feed which shows the WHOLE post, this will enable you to get possibly get the WHOLE DB of proxies new and old, but I'd only do this once.

Enjoy your proxies :) (Also the leecher will grab socks proxies, it wont differentiate, which may be why some of the proxies will fail in the Checker as it only checks for HTTP. Not Socks and no SSL.

0 comments:

Post a Comment