bandcampjson.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. import logging
  2. import demjson3
  3. class BandcampJSON:
  4. def __init__(self, body, debugging: bool = False):
  5. self.body = body
  6. self.json_data = []
  7. self.logger = logging.getLogger("bandcamp-dl").getChild("JSON")
  8. def generate(self):
  9. """Grabbing needed data from the page"""
  10. self.get_pagedata()
  11. self.get_js()
  12. return self.json_data
  13. def get_pagedata(self):
  14. self.logger.debug(" Grab pagedata JSON..")
  15. pagedata = self.body.find('div', {'id': 'pagedata'})['data-blob']
  16. self.json_data.append(pagedata)
  17. def get_js(self):
  18. """Get <script> element containing the data we need and return the raw JS"""
  19. self.logger.debug(" Grabbing embedded scripts..")
  20. embedded_scripts_raw = [self.body.find("script", {"type": "application/ld+json"}).string]
  21. for script in self.body.find_all('script'):
  22. try:
  23. album_info = script['data-tralbum']
  24. embedded_scripts_raw.append(album_info)
  25. except Exception:
  26. continue
  27. for script in embedded_scripts_raw:
  28. js_data = self.js_to_json(script)
  29. self.json_data.append(js_data)
  30. def js_to_json(self, js_data):
  31. """Convert JavaScript dictionary to JSON"""
  32. self.logger.debug(" Converting JS to JSON..")
  33. # Decode with demjson first to reformat keys and lists
  34. decoded_js = demjson3.decode(js_data)
  35. return demjson3.encode(decoded_js)