HTTP L1/L2/L3 Proxy Checker
Proxy Checker Download Link
#!/usr/bin/env python
#www.linux-ninjas.com
import Queue
import threading
import urllib2
import time
input_file = 'proxylist.txt'
threads = 10
queue = Queue.Queue()
output = []
class ThreadUrl(threading.Thread):
"""Threaded Url Grab"""
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while True:
#grabs host from queue
proxy_info = self.queue.get()
try:
proxy_handler = urllib2.ProxyHandler({'http':proxy_info})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [('User-agent','Mozilla/5.0')]
urllib2.install_opener(opener)
req = urllib2.Request("http://www.proxylists.net/proxyjudge.php")
sock=urllib2.urlopen(req, timeout= 7)
rs = sock.read(5000)
if '<TITLE>ProxyLists.Net - Proxy judge</TITLE>' in rs:
if 'Proxy is high anonymous (or no proxy)' in rs:
output.append(('HighAnon',proxy_info))
elif 'Proxy is anonymous' in rs:
output.append(('Anon',proxy_info))
elif 'Transparent' in rs:
output.append(('Trans',proxy_info))
else:
raise "Not Judging"
except:
output.append(('x',proxy_info))
#signals to queue job is done
self.queue.task_done()
start = time.time()
def main():
#spawn a pool of threads, and pass them queue instance
for i in range(5):
t = ThreadUrl(queue)
t.setDaemon(True)
t.start()
hosts = [host.strip() for host in open(input_file).readlines()]
#populate queue with data
for host in hosts:
queue.put(host)
#wait on the queue until everything has been processed
queue.join()
main()
for proxy,host in output:
if (proxy == 'HighAnon'):
print proxy,host
if (proxy == 'Anon'):
print proxy,host
if (proxy == 'Trans'):
print proxy,host
print "Elapsed Time: %s" % (time.time() - start)
This doesnt output one at a time, I personally don't mind waiting as I'm never really in a hurry to find a working proxy. Also you could change the main function (and the header) to allow writing to a file per L1/L2/L3:
#-- This section goes at the top -- high= open('L1HighAnonList.txt', 'w')anon = open('L2AnonList.txt', 'w')tran = open('L3TransList.txt', 'w')#-- This section replaces the main() --main() for proxy,host in output: if (proxy == 'HighAnon'): high.writelines(host + '\n') if (proxy == 'Anon'): anon.writelines(host + '\n') if (proxy == 'Trans'): tran.writelines(host + '\n') print "Elapsed Time: %s" % (time.time() - start)
Proxy Leecher
Proxy Leecher Download Link
#!/usr/bin/env python
#www.linux-ninjas.com
import re, urllib, collections
proxies = open('leechlist.txt', 'w')
prox = []
urls = ['http://proxy-hunter.blogspot.com.au/2012/07/26-07-12-l1l2l3-http-proxies-1467.html','http://proxy-hunter.blogspot.com.au/2012/07/28-07-12-l1l2l3-http-proxies-676.html','http://proxy-hunter.blogspot.com.au/2012/07/30-07-12-l1l2l3-http-proxies-716.html','http://proxy-hunter.blogspot.com.au/2012/07/31-07-12-l1l2l3-http-proxies-722.html']
for url in urls:
document = urllib.urlopen(url).read()
proxylist = re.findall("\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+", document)
for y in proxylist:
prox.append(y)
newprox=collections.Counter(prox)
for item in list(newprox):
proxies.writelines(item + '\n')
The leecher just grabs the urls and looks for 111.222.333.444:[0-9].
I would suggest NOT going to each page and copying them one by one, but look for an RSS Feed, because usually most proxy sites will have a RSS Feed which shows the WHOLE post, this will enable you to get possibly get the WHOLE DB of proxies new and old, but I'd only do this once.
Enjoy your proxies :) (Also the leecher will grab socks proxies, it wont differentiate, which may be why some of the proxies will fail in the Checker as it only checks for HTTP. Not Socks and no SSL.

