waybackproxy.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550
  1. #!/usr/bin/env python3
  2. import base64, datetime, json, lrudict, re, socket, socketserver, sys, threading, urllib.request, urllib.error, urllib.parse
  3. from config import *
  4. # internal LRU dictionary for preserving URLs on redirect
  5. date_cache = lrudict.LRUDict(maxduration=86400, maxsize=1024)
  6. # internal LRU dictionary for date availability
  7. availability_cache = lrudict.LRUDict(maxduration=86400, maxsize=1024) if WAYBACK_API else None
  8. class ThreadingTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
  9. """TCPServer with ThreadingMixIn added."""
  10. pass
  11. class Handler(socketserver.BaseRequestHandler):
  12. """Main request handler."""
  13. def handle(self):
  14. """Handle a request."""
  15. global availability_cache
  16. # readline is pretty convenient
  17. f = self.request.makefile()
  18. # read request line
  19. reqline = line = f.readline()
  20. split = line.rstrip('\r\n').split(' ')
  21. http_version = len(split) > 2 and split[2] or 'HTTP/0.9'
  22. if split[0] != 'GET':
  23. # only GET is implemented
  24. return self.error_page(http_version, 501, 'Not Implemented')
  25. # read out the headers
  26. request_host = None
  27. pac_host = '" + location.host + ":' + str(LISTEN_PORT) # may not actually work
  28. effective_date = DATE
  29. auth = None
  30. while line.rstrip('\r\n') != '':
  31. line = f.readline()
  32. ll = line.lower()
  33. if ll[:6] == 'host: ':
  34. pac_host = request_host = line[6:].rstrip('\r\n')
  35. if ':' not in pac_host: # explicitly specify port if running on port 80
  36. pac_host += ':80'
  37. elif ll[:21] == 'x-waybackproxy-date: ':
  38. # API for a personal project of mine
  39. effective_date = line[21:].rstrip('\r\n')
  40. elif ll[:21] == 'authorization: basic ':
  41. # asset date code passed as username:password
  42. auth = base64.b64decode(ll[21:])
  43. # parse the URL
  44. pac_file_paths = ('/proxy.pac', '/wpad.dat', '/wpad.da')
  45. if split[1][0] == '/' and split[1] not in pac_file_paths:
  46. # just a path (not corresponding to a PAC file) => transparent proxy
  47. # Host header and therefore HTTP/1.1 are required
  48. if not request_host:
  49. return self.error_page(http_version, 400, 'Host header missing')
  50. archived_url = 'http://' + request_host + split[1]
  51. else:
  52. # full URL => explicit proxy
  53. archived_url = split[1]
  54. request_url = archived_url
  55. parsed = urllib.parse.urlparse(request_url)
  56. # make a path
  57. path = parsed.path
  58. if parsed.query != '': path += '?' + parsed.query
  59. if path == '': path == '/'
  60. # get the hostname for later
  61. host = parsed.netloc.split(':')
  62. hostname = host[0]
  63. # get cached date for redirects, if available
  64. original_date = effective_date
  65. effective_date = date_cache.get(effective_date + '\x00' + archived_url, effective_date)
  66. # get date from username:password, if available
  67. if auth:
  68. effective_date = auth.replace(':', '')
  69. # effectively handle the request
  70. try:
  71. if path in pac_file_paths:
  72. # PAC file to bypass QUICK_IMAGES requests if WAYBACK_API is not enabled
  73. pac = http_version.encode('ascii', 'ignore') + b''' 200 OK\r\n'''
  74. pac += b'''Content-Type: application/x-ns-proxy-autoconfig\r\n'''
  75. pac += b'''\r\n'''
  76. pac += b'''function FindProxyForURL(url, host)\r\n'''
  77. pac += b'''{\r\n'''
  78. if not availability_cache:
  79. pac += b''' if (shExpMatch(url, "http://web.archive.org/web/*") && !shExpMatch(url, "http://web.archive.org/web/??????????????if_/*"))\r\n'''
  80. pac += b''' {\r\n'''
  81. pac += b''' return "DIRECT";\r\n'''
  82. pac += b''' }\r\n'''
  83. pac += b''' return "PROXY ''' + pac_host.encode('ascii', 'ignore') + b'''";\r\n'''
  84. pac += b'''}\r\n'''
  85. self.request.sendall(pac)
  86. return
  87. elif hostname == 'web.archive.org':
  88. if path[:5] != '/web/':
  89. # launch settings if enabled
  90. if SETTINGS_PAGE:
  91. return self.handle_settings(parsed.query)
  92. else:
  93. return self.error_page(http_version, 404, 'Not Found')
  94. else:
  95. # pass requests through to web.archive.org
  96. # required for QUICK_IMAGES
  97. archived_url = '/'.join(request_url.split('/')[5:])
  98. _print('[>] [QI] {0}'.format(archived_url))
  99. elif GEOCITIES_FIX and hostname == 'www.geocities.com':
  100. # apply GEOCITIES_FIX and pass it through
  101. _print('[>] {0}'.format(archived_url))
  102. split = archived_url.split('/')
  103. hostname = split[2] = 'www.oocities.org'
  104. request_url = '/'.join(split)
  105. else:
  106. # get from Wayback
  107. _print('[>] {0}'.format(archived_url))
  108. request_url = 'http://web.archive.org/web/{0}/{1}'.format(effective_date, archived_url)
  109. if availability_cache is not None:
  110. # are we requesting from Wayback?
  111. split = request_url.split('/')
  112. # if so, get the closest available date from Wayback's API, to avoid archived 404 pages and other site errors
  113. if split[2] == 'web.archive.org':
  114. # remove extraneous :80 from URL
  115. if ':' in split[5]:
  116. if split[7][-3:] == ':80':
  117. split[7] = split[7][:-3]
  118. elif split[5][-3:] == ':80':
  119. split[5] = split[5][:-3]
  120. # check availability LRU cache
  121. availability_url = '/'.join(split[5:])
  122. new_url = availability_cache.get(availability_url, None)
  123. if new_url:
  124. # in cache => replace URL immediately
  125. request_url = new_url
  126. else:
  127. # not in cache => contact API
  128. try:
  129. availability = json.loads(urllib.request.urlopen('https://archive.org/wayback/available?url=' + urllib.parse.quote_plus(availability_url) + '&timestamp=' + effective_date[:14], timeout=10).read())
  130. closest = availability.get('archived_snapshots', {}).get('closest', {})
  131. new_date = closest.get('timestamp', None)
  132. except:
  133. _print('[!] Failed to fetch Wayback availability data')
  134. new_date = None
  135. if new_date and new_date != effective_date[:14]:
  136. # returned date is different
  137. new_url = closest['url']
  138. # add asset tag if one is present in the original URL
  139. if len(effective_date) > 14:
  140. split = new_url.split('/')
  141. split[4] += effective_date[14:]
  142. new_url = '/'.join(split)
  143. # replace URL and add it to the availability cache
  144. request_url = availability[availability_url] = new_url
  145. conn = urllib.request.urlopen(request_url)
  146. except urllib.error.HTTPError as e:
  147. # an error has been found
  148. if e.code in (403, 404, 412):
  149. # 403, 404 or tolerance exceeded => heuristically determine the static URL for some redirect scripts
  150. match = re.search('''[^/]/((?:http(?:%3A|:)(?:%2F|/)|www(?:[0-9]+)?\.(?:[^/%]+))(?:%2F|/).+)''', archived_url, re.I)
  151. if not match:
  152. match = re.search('''(?:\?|&)(?:[^=]+)=((?:http(?:%3A|:)(?:%2F|/)|www(?:[0-9]+)?\.(?:[^/%]+))?(?:%2F|/)[^&]+)''', archived_url, re.I)
  153. if match:
  154. # we found it
  155. new_url = urllib.parse.unquote_plus(match.group(1))
  156. # add protocol if the URL is absolute but missing a protocol
  157. if new_url[0] != '/' and '://' not in new_url:
  158. new_url = 'http://' + new_url
  159. _print('[r]', new_url)
  160. return self.redirect_page(http_version, new_url)
  161. elif e.code in (301, 302):
  162. # 301 or 302 => urllib-generated error about an infinite redirect loop
  163. _print('[!] Infinite redirect loop')
  164. return self.error_page(http_version, 508, 'Infinite Redirect Loop')
  165. if e.code != 412: # tolerance exceeded has its own error message above
  166. _print('[!] {0} {1}'.format(e.code, e.reason))
  167. # If the memento Link header is present, this is a website error
  168. # instead of a Wayback error. Pass it along if that's the case.
  169. if 'Link' in e.headers:
  170. conn = e
  171. else:
  172. return self.error_page(http_version, e.code, e.reason)
  173. # get content type
  174. content_type = conn.info().get('Content-Type')
  175. if content_type == None: content_type = 'text/html'
  176. if not CONTENT_TYPE_ENCODING and content_type.find(';') > -1: content_type = content_type[:content_type.find(';')]
  177. # set the mode: [0]wayback [1]oocities
  178. mode = 0
  179. if GEOCITIES_FIX and hostname in ['www.oocities.org', 'www.oocities.com']: mode = 1
  180. # Wayback will add its HTML to anything it thinks is HTML
  181. guessed_content_type = conn.info().get('X-Archive-Guessed-Content-Type')
  182. if not guessed_content_type:
  183. guessed_content_type = content_type
  184. if 'text/html' in guessed_content_type:
  185. # Some dynamically generated links may end up pointing to
  186. # web.archive.org. Correct that by redirecting the Wayback
  187. # portion of the URL away if it ends up being HTML consumed
  188. # through the QUICK_IMAGES interface.
  189. if hostname == 'web.archive.org':
  190. conn.close()
  191. archived_url = '/'.join(request_url.split('/')[5:])
  192. _print('[r] [QI]', archived_url)
  193. return self.redirect_page(http_version, archived_url, 301)
  194. # check if the date is within tolerance
  195. if DATE_TOLERANCE is not None:
  196. match = re.search('''//web\.archive\.org/web/([0-9]+)''', conn.geturl())
  197. if match:
  198. requested_date = match.group(1)
  199. if self.wayback_to_datetime(requested_date) > self.wayback_to_datetime(original_date) + datetime.timedelta(DATE_TOLERANCE):
  200. _print('[!]', requested_date, 'is outside the configured tolerance of', DATE_TOLERANCE, 'days')
  201. conn.close()
  202. return self.error_page(http_version, 412, 'Snapshot ' + requested_date + ' not available')
  203. # consume all data
  204. data = conn.read()
  205. # patch the page
  206. if mode == 0: # wayback
  207. if b'<title>Wayback Machine</title>' in data:
  208. if b'<p>This URL has been excluded from the Wayback Machine.</p>' in data:
  209. # exclusion error (robots.txt?)
  210. return self.error_page(http_version, 403, 'URL excluded')
  211. match = re.search(b'<iframe id="playback" src="((?:(?:http(?:s)?:)?//web.archive.org)?/web/[^"]+)"', data)
  212. if match:
  213. # media playback iframe
  214. # Some websites (especially ones that use frames)
  215. # inexplicably render inside a media playback iframe.
  216. # In that case, a simple redirect would result in a
  217. # redirect loop. Download the URL and render it instead.
  218. request_url = match.group(1).decode('ascii', 'ignore')
  219. archived_url = '/'.join(request_url.split('/')[5:])
  220. print('[f]', archived_url)
  221. try:
  222. conn = urllib.request.urlopen(request_url)
  223. except urllib.error.HTTPError as e:
  224. _print('[!]', e.code, e.reason)
  225. # If the memento Link header is present, this is a website error
  226. # instead of a Wayback error. Pass it along if that's the case.
  227. if 'Link' in e.headers:
  228. conn = e
  229. else:
  230. return self.error_page(http_version, e.code, e.reason)
  231. content_type = conn.info().get('Content-Type')
  232. if not CONTENT_TYPE_ENCODING and content_type.find(';') > -1: content_type = content_type[:content_type.find(';')]
  233. data = conn.read()
  234. if b'<title></title>' in data and b'<h1><span>Internet Archive\'s Wayback Machine</span></h1>' in data:
  235. match = re.search(b'<p class="impatient"><a href="(?:(?:http(?:s)?:)?//web\.archive\.org)?/web/([^/]+)/([^"]+)">Impatient\?</a></p>', data)
  236. if match:
  237. # wayback redirect page, follow it
  238. match2 = re.search(b'<p class="code shift red">Got an HTTP ([0-9]+)', data)
  239. try:
  240. redirect_code = int(match2.group(1))
  241. except:
  242. redirect_code = 302
  243. archived_url = match.group(2).decode('ascii', 'ignore')
  244. date_cache[effective_date + '\x00' + archived_url] = match.group(1).decode('ascii', 'ignore')
  245. print('[r]', archived_url)
  246. return self.redirect_page(http_version, archived_url, redirect_code)
  247. # pre-toolbar scripts and CSS
  248. data = re.sub(b'<script src="//archive\.org/(?:.*)<!-- End Wayback Rewrite JS Include -->(?:\r)?\n', b'', data, flags=re.S)
  249. # toolbar
  250. data = re.sub(b'<!-- BEGIN WAYBACK TOOLBAR INSERT -->(?:.*)<!-- END WAYBACK TOOLBAR INSERT -->', b'', data, flags=re.S)
  251. # comments on footer
  252. data = re.sub(b'<!--(?:\r)?\n FILE ARCHIVED (?:.*)$', b'', data, flags=re.S)
  253. # fix base tag
  254. data = re.sub(b'(<base (?:[^>]*)href=(?:["\'])?)(?:(?:http(?:s)?:)?//web.archive.org)?/web/(?:[^/]+)/', b'\\1', data, flags=re.I + re.S)
  255. # remove extraneous :80 from links
  256. data = re.sub(b'((?:(?:http(?:s)?:)?//web.archive.org)?/web/)([^/]+)/([^:]+)://([^:]+):80/', b'\\1\\2/\\3://\\4/', data)
  257. # fix links
  258. if QUICK_IMAGES:
  259. # QUICK_IMAGES works by intercepting asset URLs (those
  260. # with a date code ending in im_, js_...) and letting the
  261. # proxy pass them through. This may reduce load time
  262. # because Wayback doesn't have to hunt down the closest
  263. # copy of that asset to DATE, as those URLs have specific
  264. # date codes. This taints the HTML with web.archive.org
  265. # URLs. QUICK_IMAGES=2 uses the original URLs with an added
  266. # username:password, which taints less but is not supported
  267. # by all browsers - IE notably kills the whole page if it
  268. # sees an iframe pointing to an invalid URL.
  269. data = re.sub(b'(?:(?:http(?:s)?:)?//web.archive.org)?/web/([0-9]+)([a-z]+_)/([^:]+)://',
  270. QUICK_IMAGES == 2 and b'\\3://\\1:\\2@' or b'http://web.archive.org/web/\\1\\2/\\3://', data)
  271. data = re.sub(b'(?:(?:http(?:s)?:)?//web.archive.org)?/web/([0-9]+)/', b'', data)
  272. else:
  273. # Remove asset URLs while simultaneously adding them to the
  274. # LRU cache with their respective date.
  275. def add_to_date_cache(match):
  276. orig_url = match.group(2)
  277. date_cache[effective_date + '\x00' + orig_url.decode('ascii', 'ignore')] = match.group(1).decode('ascii', 'ignore')
  278. return orig_url
  279. data = re.sub(b'(?:(?:http(?:s)?:)?//web.archive.org)?/web/([^/]+)/([^"\'#<>]+)', add_to_date_cache, data)
  280. elif mode == 1: # oocities
  281. # viewport/cache-control/max-width code (header)
  282. data = re.sub(b'^(?:.*?)\n\n', b'', data, flags=re.S)
  283. # archive notice and tracking code (footer)
  284. data = re.sub(b'<style> \n.zoomout { -webkit-transition: (?:.*)$', b'', data, flags=re.S)
  285. # clearly labeled snippets from Geocities
  286. data = re.sub(b'^(?:.*)<\!-- text above generated by server\. PLEASE REMOVE -->', b'', data, flags=re.S)
  287. data = re.sub(b'<\!-- following code added by server\. PLEASE REMOVE -->(?:.*)<\!-- preceding code added by server\. PLEASE REMOVE -->', b'', data, flags=re.S)
  288. data = re.sub(b'<\!-- text below generated by server\. PLEASE REMOVE -->(?:.*)$', b'', data, flags=re.S)
  289. # fix links
  290. data = re.sub(b'//([^.]*)\.oocities\.com/', b'//\\1.geocities.com/', data, flags=re.S)
  291. self.send_response_headers(conn, http_version, content_type, request_url)
  292. self.request.sendall(data)
  293. else: # other data
  294. self.send_response_headers(conn, http_version, content_type, request_url)
  295. while True:
  296. data = conn.read(1024)
  297. if not data: break
  298. self.request.sendall(data)
  299. self.request.close()
  300. def send_response_headers(self, conn, http_version, content_type, request_url):
  301. """Generate and send the response headers."""
  302. response = http_version
  303. # pass the error code if there is one
  304. if isinstance(conn, urllib.error.HTTPError):
  305. response += '{0} {1}'.format(conn.code, conn.reason.replace('\n', ' '))
  306. else:
  307. response += '200 OK'
  308. # add content type, and the ETag for caching
  309. response += '\r\nContent-Type: ' + content_type + '\r\nETag: "' + request_url.replace('"', '') + '"\r\n'
  310. # add X-Archive-Orig-* headers
  311. headers = conn.info()
  312. for header in headers:
  313. if header.find('X-Archive-Orig-') == 0:
  314. orig_header = header[15:]
  315. # blacklist certain headers which may alter the client
  316. if orig_header.lower() not in ('connection', 'location', 'content-type', 'content-length', 'etag', 'authorization', 'set-cookie'):
  317. response += orig_header + ': ' + headers[header] + '\r\n'
  318. # finish and send the request
  319. response += '\r\n'
  320. self.request.sendall(response.encode('ascii', 'ignore'))
  321. def error_page(self, http_version, code, reason):
  322. """Generate an error page."""
  323. # make error page
  324. errorpage = '<html><head><title>{0} {1}</title>'.format(code, reason)
  325. # IE's same-origin policy throws "Access is denied." inside frames
  326. # loaded from a different origin. Use that to our advantage, even
  327. # though regular frames are also affected. IE also doesn't recognize
  328. # language="javascript1.4", so use 1.3 while blocking IE4 by detecting
  329. # the lack of screenLeft as IE4 is quite noisy with script errors.
  330. errorpage += '<script language="javascript1.3">if (window.screenLeft != null) { eval(\'try { var frameElement = window.frameElement; } catch (e) { document.location.href = "about:blank"; }\'); }</script>'
  331. errorpage += '<script language="javascript">if (window.self != window.top && !(window.frameElement && window.frameElement.tagName == "FRAME")) { document.location.href = "about:blank"; }</script>'
  332. errorpage += '</head><body><h1>{0}</h1><p>'.format(reason)
  333. # add code information
  334. if code in (404, 508): # page not archived or redirect loop
  335. errorpage += 'This page may not be archived by the Wayback Machine.'
  336. elif code == 403: # not crawled due to exclusion
  337. errorpage += 'This page was not archived due to a Wayback Machine exclusion.'
  338. elif code == 501: # method not implemented
  339. errorpage += 'WaybackProxy only implements the GET method.'
  340. elif code == 412: # outside of tolerance
  341. errorpage += 'The earliest snapshot for this page is outside of the configured tolerance interval.'
  342. elif code == 400 and reason == 'Host header missing': # no host header in transparent mode
  343. errorpage += 'WaybackProxy\'s transparent mode requires an HTTP/1.1 compliant client.'
  344. else: # another error
  345. errorpage += 'Unknown error. The Wayback Machine may be experiencing technical difficulties.'
  346. errorpage += '</p><hr><i>'
  347. errorpage += self.signature()
  348. errorpage += '</i></body></html>'
  349. # add padding for IE
  350. if len(errorpage) <= 512:
  351. padding = '\n<!-- This comment pads the HTML so Internet Explorer displays this error page instead of its own. '
  352. remainder = 510 - len(errorpage) - len(padding)
  353. if remainder > 0:
  354. padding += ' ' * remainder
  355. padding += '-->'
  356. errorpage += padding
  357. # send error page and stop
  358. self.request.sendall('{0} {1} {2}\r\nContent-Type: text/html\r\nContent-Length: {3}\r\n\r\n{4}'.format(http_version, code, reason, len(errorpage), errorpage).encode('utf8', 'ignore'))
  359. self.request.close()
  360. def redirect_page(self, http_version, target, code=302):
  361. """Generate a redirect page."""
  362. # make redirect page
  363. redirectpage = '<html><head><title>Redirect</title><meta http-equiv="refresh" content="0;url='
  364. redirectpage += target
  365. redirectpage += '"></head><body><p>If you are not redirected, <a href="'
  366. redirectpage += target
  367. redirectpage += '">click here</a>.</p></body></html>'
  368. # send redirect page and stop
  369. self.request.sendall('{0} {1} Found\r\nLocation: {2}\r\nContent-Type: text/html\r\nContent-Length: {3}\r\n\r\n{4}'.format(http_version, code, target, len(redirectpage), redirectpage).encode('utf8', 'ignore'))
  370. self.request.close()
  371. def handle_settings(self, query):
  372. """Generate the settings page."""
  373. global DATE, GEOCITIES_FIX, QUICK_IMAGES, CONTENT_TYPE_ENCODING
  374. if query != '': # handle any parameters that may have been sent
  375. parsed = urllib.parse.parse_qs(query)
  376. if 'date' in parsed and DATE != parsed['date'][0]:
  377. DATE = parsed['date'][0]
  378. date_cache.clear()
  379. availability_cache.clear()
  380. if 'dateTolerance' in parsed and DATE_TOLERANCE != parsed['dateTolerance'][0]:
  381. DATE_TOLERANCE = parsed['dateTolerance'][0]
  382. GEOCITIES_FIX = 'gcFix' in parsed
  383. QUICK_IMAGES = 'quickImages' in parsed
  384. CONTENT_TYPE_ENCODING = 'ctEncoding' in parsed
  385. # send the page and stop
  386. settingspage = 'HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r\n'
  387. settingspage += '<html><head><title>WaybackProxy Settings</title></head><body><p><b>'
  388. settingspage += self.signature()
  389. settingspage += '</b></p><form method="get" action="/">'
  390. settingspage += '<p>Date to get pages from: <input type="text" name="date" size="8" value="'
  391. settingspage += DATE
  392. settingspage += '"><p>Date tolerance: <input type="text" name="dateTolerance" size="8" value="'
  393. settingspage += DATE_TOLERANCE
  394. settingspage += '"> days<br><input type="checkbox" name="gcFix"'
  395. if GEOCITIES_FIX: settingspage += ' checked'
  396. settingspage += '> Geocities Fix<br><input type="checkbox" name="quickImages"'
  397. if QUICK_IMAGES: settingspage += ' checked'
  398. settingspage += '> Quick images<br><input type="checkbox" name="ctEncoding"'
  399. if CONTENT_TYPE_ENCODING: settingspage += ' checked'
  400. settingspage += '> Encoding in Content-Type</p><p><input type="submit" value="Save"></p></form></body></html>'
  401. self.request.send(settingspage.encode('utf8', 'ignore'))
  402. self.request.close()
  403. def signature(self):
  404. """Return the server signature."""
  405. return 'WaybackProxy on {0}'.format(socket.gethostname())
  406. def wayback_to_datetime(self, date):
  407. """Convert a Wayback format date string to a datetime.datetime object."""
  408. # parse the string
  409. year = 1995
  410. month = 12
  411. day = 31
  412. hour = 0
  413. minute = 0
  414. second = 0
  415. if len(date) > 0:
  416. year = int(date[:4])
  417. if len(date) > 4:
  418. month = int(date[4:6])
  419. if len(date) > 6:
  420. day = int(date[6:8])
  421. if len(date) > 8:
  422. hour = int(date[8:10])
  423. if len(date) > 10:
  424. minute = int(date[10:12])
  425. if len(date) > 12:
  426. second = int(date[12:14])
  427. # sanitize the numbers
  428. if month < 1:
  429. month = 1
  430. elif month > 12:
  431. month = 12
  432. if day < 1:
  433. day = 1
  434. elif day > 31:
  435. day = 31
  436. if hour > 23:
  437. hour = 23
  438. elif hour < 0:
  439. hour = 0
  440. if minute > 59:
  441. minute = 59
  442. elif minute < 0:
  443. minute = 0
  444. if second > 59:
  445. second = 59
  446. elif second < 0:
  447. second = 0
  448. # if the day is invalid for that month, work its way down
  449. try:
  450. dt = datetime.datetime(year, month, day, hour, minute, second) # max 31
  451. except:
  452. try:
  453. dt = datetime.datetime(year, month, day - 1, hour, minute, second) # max 30
  454. except:
  455. try:
  456. dt = datetime.datetime(year, month, day - 2, hour, minute, second) # max 29
  457. except:
  458. dt = datetime.datetime(year, month, day - 3, hour, minute, second) # max 28
  459. return dt
  460. print_lock = threading.Lock()
  461. def _print(*args, linebreak=True):
  462. """Logging function."""
  463. if SILENT: return
  464. s = ' '.join([str(x) for x in args])
  465. print_lock.acquire()
  466. sys.stdout.write(linebreak and (s + '\n') or s)
  467. sys.stdout.flush()
  468. print_lock.release()
  469. def main():
  470. """Starts the server."""
  471. server = ThreadingTCPServer(('', LISTEN_PORT), Handler)
  472. _print('[-] Now listening on port {0}'.format(LISTEN_PORT))
  473. try:
  474. server.serve_forever()
  475. except KeyboardInterrupt: # Ctrl+C to stop
  476. pass
  477. if __name__ == '__main__':
  478. main()