%!s(int64=5) %!d(string=hai) anos · 59239d15b1
--- a/README.md
+++ b/README.md
@@ -1,19 +1,20 @@
 
				 # WaybackProxy
			
 
				 
			
 
				-WaybackProxy is a HTTP proxy that sends all requests through the [Internet Archive Wayback Machine](http://web.archive.org) and [OoCities](http://www.oocities.org), returning the original retro-browser-friendly markup.
			
 
				+WaybackProxy is a retro-friendly HTTP proxy which retrieves pages from the [Internet Archive Wayback Machine](http://web.archive.org) or [OoCities](http://www.oocities.org) and delivers them in their original form, without toolbars, scripts and other extraneous content that may confuse retro browsers.
			
 
				 
			
 
				 ![1999 Google viewed on Internet Explorer 4.0 on Windows 95](http://i.imgur.com/tXsLc6O.png)
			
 
				 
			
 
				 ## Setup
			
 
				 
			
 
				 1. Edit `config.py` to your liking
			
 
				-2. Start `waybackproxy.py`
			
 
				+2. Start `waybackproxy.py` (Python 3 is required)
			
 
				 3. Set up your retro browser:
			
 
				 	* If your browser supports proxy auto-configuration, set the auto-configuration URL to `http://ip:port/proxy.pac` where `ip` is the IP of the system running WaybackProxy and `port` is the proxy's port (8888 by default).
			
 
				 	* If proxy auto-configuration is not supported or fails to work, set the browser to use an HTTP proxy at that IP and port instead.
			
 
				-	* Transparent proxying is also supported for advanced users. No configuration to WaybackProxy itself is required. In transparent mode, client machines must be pointed at a dummy DNS server so they can find the proxy; `dnsmasq -A "/#/ip"` is a good choice.
			
 
				+	* Transparent proxying is also supported for advanced users, with no configuration to WaybackProxy itself required.
			
 
				+		* The easiest way to set up a transparent WaybackProxy is to run it on port 80 ([this cannot be done on Linux without security implications](https://unix.stackexchange.com/questions/87348/capabilities-for-a-script-on-linux)\), set up a fake DNS server - such as `dnsmasq -A "/#/ip"` where `ip` is the IP of the system running WaybackProxy - to redirect all requests to the proxy, and point client machines at that DNS server.
			
 
				 4. Try it out! You can edit most settings that are in `config.py` by browsing to http://web.archive.org while on the proxy, although you must edit `config.py` to make them permanent.
			
 
				-5. Press Ctrl+C to stop
			
 
				+5. Press Ctrl+C to stop the proxy
			
 
				 
			
 
				 ## Known issues and limitations
			
 
				 
			
@@ -23,10 +24,11 @@ WaybackProxy is a HTTP proxy that sends all requests through the [Internet Archi
 
				   * Strange 404 errors caused by bad server responses or incorrect URL capitalization at archival time;
			
 
				   * Infinite redirect loops;
			
 
				   * Server errors when it's having a bad day.
			
 
				-* WaybackProxy will work around some redirection scripts (example: `http://example.com/redirect?to=http://...`) which are not archived by the Wayback Machine, but the destination URLs might not be archived as well.
			
 
				+* WaybackProxy will work around some redirection scripts (example: `http://example.com/redirect?to=http://...`) which are not archived by the Wayback Machine, but the destination URLs are sometimes not archived either.
			
 
				 * WaybackProxy is not a generic proxy. The POST and CONNECT methods are not implemented.
			
 
				+* Transparent proxying mode requires HTTP/1.1 and therefore cannot be used with some really old (pre-1996) browsers. Use standard mode with such browsers.
			
 
				 
			
 
				 ## Other links
			
 
				 
			
 
				-* [Donate to the Internet Archive](https://archive.org/donate/), they need your help to keep the Wayback Machine and its petabytes upon petabytes of data available to everyone at no cost.
			
 
				-* [Check out 86Box](https://86box.github.io/), the emulator I use for testing WaybackProxy on older browsers.
			
 
				+* [Donate to the Internet Archive](https://archive.org/donate/), they need your help to keep the Wayback Machine and its petabytes upon petabytes of data available to everyone for free with no ads.
			
 
				+* [Check out 86Box](https://86box.github.io/), the emulator I use for testing WaybackProxy on older browsers.
			
--- a/config.py
+++ b/config.py
@@ -1,8 +1,9 @@
 
				-# Listen port for the HTTP proxy
			
 
				+# Listen port for the HTTP proxy.
			
 
				 LISTEN_PORT = 8888
			
 
				 
			
 
				-# Date to get pages from Wayback (YYYY, YYYYMM or YYYYMMDD)
			
 
				-DATE = '1998'
			
 
				+# Date to get pages from Wayback. YYYYMMDD, YYYYMM and YYYY formats are
			
 
				+# accepted, the more specific the better.
			
 
				+DATE = '20011025' # <- Windows XP release date in case you're wondering
			
 
				 
			
 
				 # Allow the client to load pages and assets up to X days after DATE.
			
 
				 # Set to None to disable this restriction.
			
@@ -14,8 +15,8 @@ GEOCITIES_FIX = True
 
				 # Use the Wayback-tampered URL as a shortcut when loading images.
			
 
				 # May result in faster loads, but all images will point to 
			
 
				 # http://web.archive.org/... as a result. Set this value to 2 to enable an
			
 
				-# experimental mode using username:passwords on top of the original URLs
			
 
				-# instead (not supported by Internet Explorer and some other browsers).
			
 
				+# experimental mode using authentication on top of the original URLs instead
			
 
				+# (which is not supported by Internet Explorer and some other browsers).
			
 
				 QUICK_IMAGES = True
			
 
				 
			
 
				 # Allow the Content-Type header to contain an encoding. Some old browsers
			
@@ -23,5 +24,5 @@ QUICK_IMAGES = True
 
				 # False if you're using one of them.
			
 
				 CONTENT_TYPE_ENCODING = True
			
 
				 
			
 
				-# Don't print log entries
			
 
				+# Disables logging if set to True.
			
 
				 SILENT = False
			
--- a/waybackproxy.py
+++ b/waybackproxy.py
@@ -131,9 +131,9 @@ class Handler(socketserver.BaseRequestHandler):
 
				 
			
 
				 			if e.code in (403, 404, 412):
			
 
				 				# 403, 404 or tolerance exceeded => heuristically determine the static URL for some redirect scripts
			
 
				-				match = re.search('''[^/]/((?:http(?:%3A|:)(?:%2F|/)|www(?:[0-9]+)?\.(?:[^/%]+))(?:%2F|/).+)''', archived_url, re.IGNORECASE)
			
 
				+				match = re.search('''[^/]/((?:http(?:%3A|:)(?:%2F|/)|www(?:[0-9]+)?\.(?:[^/%]+))(?:%2F|/).+)''', archived_url, re.I)
			
 
				 				if not match:
			
 
				-					match = re.search('''(?:\?|&)(?:[^=]+)=((?:http(?:%3A|:)(?:%2F|/)|www(?:[0-9]+)?\.(?:[^/%]+))?(?:%2F|/)[^&]+)''', archived_url, re.IGNORECASE)
			
 
				+					match = re.search('''(?:\?|&)(?:[^=]+)=((?:http(?:%3A|:)(?:%2F|/)|www(?:[0-9]+)?\.(?:[^/%]+))?(?:%2F|/)[^&]+)''', archived_url, re.I)
			
 
				 				if match:
			
 
				 					print(match.groups())
			
 
				 					# we found it
			
@@ -260,7 +260,7 @@ class Handler(socketserver.BaseRequestHandler):
 
				 					# date codes. This taints the HTML with web.archive.org
			
 
				 					# URLs. QUICK_IMAGES=2 uses the original URLs with an added
			
 
				 					# username:password, which taints less but is not supported
			
 
				-					# by all browsers - IE6 notably kills the whole page if it
			
 
				+					# by all browsers - IE notably kills the whole page if it
			
 
				 					# sees an iframe pointing to an invalid URL.
			
 
				 					data = re.sub(b'(?:(?:http(?:s)?:)?//web.archive.org)?/web/([0-9]+)([a-z]+_)/([^:]+)://',
			
 
				 						QUICK_IMAGES == 2 and b'\\3://\\1:\\2@' or b'http://web.archive.org/web/\\1\\2/\\3://', data)