Browse Source

Only check date tolerance on HTML content

Fixes issues with asset loading in some sites, due to the assets being present at future dates outside of the tolerance.
RichardG867 5 năm trước cách đây
mục cha
commit
b78e717c5a
1 tập tin đã thay đổi với 10 bổ sung9 xóa
  1. 10 9
      waybackproxy.py

+ 10 - 9
waybackproxy.py

@@ -115,15 +115,6 @@ class Handler(socketserver.BaseRequestHandler):
 				request_url = 'http://web.archive.org/web/{0}/{1}'.format(effective_date, archived_url)
 
 				conn = urllib.request.urlopen(request_url)
-
-			# check if the date is within tolerance
-			if DATE_TOLERANCE is not None:
-				match = re.search('''//web\.archive\.org/web/([0-9]+)''', conn.geturl())
-				if match:
-					requested_date = match.group(1)
-					if self.wayback_to_datetime(requested_date) > self.wayback_to_datetime(original_date) + datetime.timedelta(DATE_TOLERANCE):
-						_print('[!]', requested_date, 'is outside the configured tolerance of', DATE_TOLERANCE, 'days')
-						raise urllib.error.HTTPError(conn.geturl(), 412, 'Snapshot ' + requested_date + ' not available', conn.info(), conn)
 		except urllib.error.HTTPError as e:
 			# an error has been found
 
@@ -170,6 +161,16 @@ class Handler(socketserver.BaseRequestHandler):
 				_print('[r] [QI]', archived_url)
 				return self.redirect_page(http_version, archived_url, 301)
 
+			# check if the date is within tolerance
+			if DATE_TOLERANCE is not None:
+				match = re.search('''//web\.archive\.org/web/([0-9]+)''', conn.geturl())
+				if match:
+					requested_date = match.group(1)
+					if self.wayback_to_datetime(requested_date) > self.wayback_to_datetime(original_date) + datetime.timedelta(DATE_TOLERANCE):
+						_print('[!]', requested_date, 'is outside the configured tolerance of', DATE_TOLERANCE, 'days')
+						conn.close()
+						return self.error_page(http_version, 412, 'Snapshot ' + requested_date + ' not available')
+
 			# consume all data
 			data = conn.read()