Explorar el Código

Fix one of the redirect heuristic regexes

RichardG867 hace 3 años
padre
commit
227207a5e6
Se han modificado 1 ficheros con 1 adiciones y 1 borrados
  1. 1 1
      waybackproxy.py

+ 1 - 1
waybackproxy.py

@@ -187,7 +187,7 @@ class Handler(socketserver.BaseRequestHandler):
 
 
 			if e.code in (403, 404, 412): # not found or tolerance exceeded
 			if e.code in (403, 404, 412): # not found or tolerance exceeded
 				# Heuristically determine the static URL for some redirect scripts.
 				# Heuristically determine the static URL for some redirect scripts.
-				match = re.search('''[^/]/((?:http(?:%3A|:)(?:%2F|/)|www[0-9]*)\\.[^/%]+)(?:%2F|/).+)''', archived_url, re.I) # URL in path
+				match = re.search('''[^/]/((?:http(?:%3A|:)(?:%2F|/)|www[0-9]*\\.[^/%]+)(?:%2F|/).+)''', archived_url, re.I) # URL in path
 				if not match:
 				if not match:
 					match = re.search('''[\\?&][^=]+=((?:http(?:%3A|:)(?:%2F|/)|www[0-9]*\\.[^/%]+)?(?:%2F|/)[^&]+)''', archived_url, re.I) # URL in query string
 					match = re.search('''[\\?&][^=]+=((?:http(?:%3A|:)(?:%2F|/)|www[0-9]*\\.[^/%]+)?(?:%2F|/)[^&]+)''', archived_url, re.I) # URL in query string
 				if match: # found URL
 				if match: # found URL