2 лет назад · 8b9d52744a
--- a/waybackproxy.py
+++ b/waybackproxy.py
@@ -204,7 +204,7 @@ class Handler(socketserver.BaseRequestHandler):
 
															 							request_url = self.shared_state.availability_cache[availability_url] = new_url
														
 
															 			# Start fetching the URL.
														
 
															-			retry = urllib3.util.retry.Retry(total=10, connect=10, read=5, redirect=5, backoff_factor=0.5)
														
 
															+			retry = urllib3.util.retry.Retry(total=10, connect=10, read=5, redirect=5, backoff_factor=1, raise_on_redirect=True)
														
 
															 			conn = self.shared_state.http.urlopen('GET', request_url, retries=retry, preload_content=False)
														
 
															 		except urllib3.exceptions.MaxRetryError as e:
														
 
															 			_print('[!] Fetch retries exceeded:', e.reason)
														
@@ -221,7 +221,7 @@ class Handler(socketserver.BaseRequestHandler):
 
															 				if self.guess_and_send_redirect(http_version, archived_url):
														
 
															 					conn.release_conn()
														
 
															 					return
														
 
															-			elif conn.status in (301, 302): # urllib-generated error about an infinite redirect loop
														
 
															+			elif conn.status in (301, 302): # urllib3-generated error about an infinite redirect loop
														
 
															 				conn.release_conn()
														
 
															 				_print('[!] Infinite redirect loop')
														
 
															 				return self.send_error_page(http_version, 508, 'Infinite Redirect Loop')
														
@@ -372,12 +372,14 @@ class Handler(socketserver.BaseRequestHandler):
 
															 					# username:password, which taints less but is not supported
														
 
															 					# by all browsers - IE notably kills the whole page if it
														
 
															 					# sees an iframe pointing to an invalid URL.
														
 
															-					data = re.sub(b'(?:(?:https?:)?//web.archive.org)?/web/([0-9]+)([a-z]+_)/([^:/]+://)',
														
 
															-						QUICK_IMAGES == 2 and b'\\3\\1:\\2@' or b'http://web.archive.org/web/\\1\\2/\\3', data)
														
 
															-					def strip_https(match): # convert secure non-asset URLs to regular HTTP
														
 
															-						first_component = match.group(1)
														
 
															-						return first_component == b'https:' and b'http:' or first_component
														
 
															-					data = re.sub(b'(?:(?:https?:)?//web.archive.org)?/web/[^/]+/([^/]+)', strip_https, data)
														
 
															+					def filter_asset(match):
														
 
															+						if match.group(2) in (None, b'if_', b'fw_'): # non-asset URL
														
 
															+							return match.group(3) == b'https://' and b'http://' or match.group(3) # convert secure non-asset URLs to regular HTTP
														
 
															+						elif QUICK_IMAGES == 2:
														
 
															+							return b'http://' + match.group(1) + b':' + match.group(2) + b'@'
														
 
															+						else:
														
 
															+							return b'http://web.archive.org/web/' + match.group(1) + match.group(2) + b'/' + match.group(3)
														
 
															+					data = re.sub(b'(?:(?:https?:)?//web.archive.org)?/web/([0-9]+)([a-z]+_)?/([^:/]+://)', filter_asset, data)
														
 
															 				else:
														
 
															 					# Remove asset URLs while simultaneously adding them to the date LRU cache
														
 
															 					# with their respective date and converting secure URLs to regular HTTP.