WaybackProxy Redirect

#!/usr/bin/env python import re, socket, SocketServer, urllib2, urlparse from config import * class ThreadingTCPServer(SocketServer.ThreadingMixIn, SocketServer.TCPServer): """TCPServer with ThreadingMixIn added.""" pass class Handler(SocketServer.BaseRequestHandler): """Main request handler.""" def handle(self): """Handle a request.""" # readline is pretty convenient f = self.request.makefile() # read request line reqline = line = f.readline() split = line.rstrip('\r\n').split(' ') http_version = len(split) > 2 and split[2] or 'HTTP/0.9' if split[0] != 'GET': # only GET is implemented return self.error_page(http_version, 501, 'Not Implemented') # parse the URL request_url = split[1] parsed = urlparse.urlparse(request_url) # make a path path = parsed.path if parsed.query != '': path += '?' + parsed.query if path == '': path == '/' # get the hostname for later host = parsed.netloc.split(':') hostname = host[0] # read out the headers while line.rstrip('\r\n') != '': line = f.readline() try: if hostname == 'web.archive.org': if path[:5] != '/web/': # launch settings return self.handle_settings(parsed.query) else: # pass-through requests to web.archive.org # required for QUICK_IMAGES print '[>] [QI] {0}'.format('/'.join(request_url.split('/')[5:])) conn = urllib2.urlopen(request_url) elif GEOCITIES_FIX and hostname == 'www.geocities.com': # apply GEOCITIES_FIX and pass it through split = request_url.split('/') hostname = split[2] = 'www.oocities.org' request_url = '/'.join(split) print '[>] {0}'.format(request_url) conn = urllib2.urlopen(request_url) else: # get from Wayback print '[>] {0}'.format(request_url) conn = urllib2.urlopen('http://web.archive.org/web/{0}/{1}'.format(DATE, request_url)) except urllib2.HTTPError as e: # an error has been found print '[!] {0} {1}'.format(e.code, e.reason) return self.error_page(http_version, e.code, e.reason) # get content type content_type = conn.info().getheader('Content-Type') if not CONTENT_TYPE_ENCODING and content_type.find(';') > -1: content_type = content_type[:content_type.find(';')] # send headers self.request.sendall('{0} 200 OK\r\nContent-Type: {1}\r\n\r\n'.format(http_version, content_type)) # set the mode: [0]wayback [1]oocities mode = 0 if GEOCITIES_FIX and hostname in ['www.oocities.org', 'www.oocities.com']: mode = 1 if content_type[:9] == 'text/html' in content_type: # HTML toolbar = mode == 1 # oocities header starts without warning after_header = False redirect_page = False for line in conn: line = line.rstrip('\r\n') if mode == 0: if toolbar: if line == '': # toolbar is done - resume relaying on the next line toolbar = False after_header = True continue elif redirect_page: # this is a really bad way to deal with Wayback's 302 # pages, but necessary with the way this proxy works match = re.search('

Impatient\\?

', line) if match: line = 'WaybackProxy RedirectIf you are not redirected, click here.' self.request.sendall(line) break continue if not after_header: ll = line.lower() if line == '' or line == '' or line[:69] == '