DownloadFileThread.java 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. package com.rarchives.ripme.ripper;
  2. import java.io.*;
  3. import java.net.*;
  4. import java.nio.file.Files;
  5. import java.nio.file.Paths;
  6. import java.util.Arrays;
  7. import java.util.HashMap;
  8. import java.util.Map;
  9. import javax.net.ssl.HttpsURLConnection;
  10. import org.apache.logging.log4j.LogManager;
  11. import org.apache.logging.log4j.Logger;
  12. import org.jsoup.HttpStatusException;
  13. import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
  14. import com.rarchives.ripme.utils.Utils;
  15. /**
  16. * Thread for downloading files. Includes retry logic, observer notifications,
  17. * and other goodies.
  18. */
  19. class DownloadFileThread implements Runnable {
  20. private static final Logger logger = LogManager.getLogger(DownloadFileThread.class);
  21. private String referrer = "";
  22. private Map<String, String> cookies = new HashMap<>();
  23. private final URL url;
  24. private File saveAs;
  25. private final String prettySaveAs;
  26. private final AbstractRipper observer;
  27. private final int retries;
  28. private final Boolean getFileExtFromMIME;
  29. private final int TIMEOUT;
  30. private final int retrySleep;
  31. public DownloadFileThread(URL url, File saveAs, AbstractRipper observer, Boolean getFileExtFromMIME) {
  32. super();
  33. this.url = url;
  34. this.saveAs = saveAs;
  35. this.prettySaveAs = Utils.removeCWD(saveAs.toPath());
  36. this.observer = observer;
  37. this.retries = Utils.getConfigInteger("download.retries", 1);
  38. this.TIMEOUT = Utils.getConfigInteger("download.timeout", 60000);
  39. this.retrySleep = Utils.getConfigInteger("download.retry.sleep", 0);
  40. this.getFileExtFromMIME = getFileExtFromMIME;
  41. }
  42. public void setReferrer(String referrer) {
  43. this.referrer = referrer;
  44. }
  45. public void setCookies(Map<String, String> cookies) {
  46. this.cookies = cookies;
  47. }
  48. /**
  49. * Attempts to download the file. Retries as needed. Notifies observers upon
  50. * completion/error/warn.
  51. */
  52. @Override
  53. public void run() {
  54. // First thing we make sure the file name doesn't have any illegal chars in it
  55. saveAs = new File(
  56. saveAs.getParentFile().getAbsolutePath() + File.separator + Utils.sanitizeSaveAs(saveAs.getName()));
  57. long fileSize = 0;
  58. int bytesTotal;
  59. int bytesDownloaded = 0;
  60. if (saveAs.exists() && observer.tryResumeDownload()) {
  61. fileSize = saveAs.length();
  62. }
  63. try {
  64. observer.stopCheck();
  65. } catch (IOException e) {
  66. observer.downloadErrored(url, Utils.getLocalizedString("download.interrupted"));
  67. return;
  68. }
  69. if (saveAs.exists() && !observer.tryResumeDownload() && !getFileExtFromMIME
  70. || Utils.fuzzyExists(Paths.get(saveAs.getParent()), saveAs.getName()) && getFileExtFromMIME
  71. && !observer.tryResumeDownload()) {
  72. if (Utils.getConfigBoolean("file.overwrite", false)) {
  73. logger.info("[!] " + Utils.getLocalizedString("deleting.existing.file") + prettySaveAs);
  74. if (!saveAs.delete()) logger.error("could not delete existing file: " + saveAs.getAbsolutePath());
  75. } else {
  76. logger.info("[!] " + Utils.getLocalizedString("skipping") + " " + url + " -- "
  77. + Utils.getLocalizedString("file.already.exists") + ": " + prettySaveAs);
  78. observer.downloadExists(url, saveAs.toPath());
  79. return;
  80. }
  81. }
  82. URL urlToDownload = this.url;
  83. boolean redirected = false;
  84. int tries = 0; // Number of attempts to download
  85. do {
  86. tries += 1;
  87. try {
  88. logger.info(" Downloading file: " + urlToDownload + (tries > 0 ? " Retry #" + tries : ""));
  89. observer.sendUpdate(STATUS.DOWNLOAD_STARTED, url.toExternalForm());
  90. // Setup HTTP request
  91. HttpURLConnection huc;
  92. if (this.url.toString().startsWith("https")) {
  93. huc = (HttpsURLConnection) urlToDownload.openConnection();
  94. } else {
  95. huc = (HttpURLConnection) urlToDownload.openConnection();
  96. }
  97. huc.setInstanceFollowRedirects(true);
  98. // It is important to set both ConnectTimeout and ReadTimeout. If you don't then
  99. // ripme will wait forever
  100. // for the server to send data after connecting.
  101. huc.setConnectTimeout(TIMEOUT);
  102. huc.setReadTimeout(TIMEOUT);
  103. huc.setRequestProperty("accept", "*/*");
  104. if (!referrer.equals("")) {
  105. huc.setRequestProperty("Referer", referrer); // Sic
  106. }
  107. huc.setRequestProperty("User-agent", AbstractRipper.USER_AGENT);
  108. StringBuilder cookie = new StringBuilder();
  109. for (String key : cookies.keySet()) {
  110. if (!cookie.toString().equals("")) {
  111. cookie.append("; ");
  112. }
  113. cookie.append(key).append("=").append(cookies.get(key));
  114. }
  115. huc.setRequestProperty("Cookie", cookie.toString());
  116. if (observer.tryResumeDownload()) {
  117. if (fileSize != 0) {
  118. huc.setRequestProperty("Range", "bytes=" + fileSize + "-");
  119. }
  120. }
  121. logger.debug(Utils.getLocalizedString("request.properties") + ": " + huc.getRequestProperties());
  122. huc.connect();
  123. int statusCode = huc.getResponseCode();
  124. logger.debug("Status code: " + statusCode);
  125. // If the server doesn't allow resuming downloads error out
  126. if (statusCode != 206 && observer.tryResumeDownload() && saveAs.exists()) {
  127. // TODO find a better way to handle servers that don't support resuming
  128. // downloads then just erroring out
  129. throw new IOException(Utils.getLocalizedString("server.doesnt.support.resuming.downloads"));
  130. }
  131. if (statusCode / 100 == 3) { // 3xx Redirect
  132. if (!redirected) {
  133. // Don't increment retries on the first redirect
  134. tries--;
  135. redirected = true;
  136. }
  137. String location = huc.getHeaderField("Location");
  138. urlToDownload = new URI(location).toURL();
  139. // Throw exception so download can be retried
  140. throw new IOException("Redirect status code " + statusCode + " - redirect to " + location);
  141. }
  142. if (statusCode / 100 == 4) { // 4xx errors
  143. logger.error("[!] " + Utils.getLocalizedString("nonretriable.status.code") + " " + statusCode
  144. + " while downloading from " + url);
  145. observer.downloadErrored(url, Utils.getLocalizedString("nonretriable.status.code") + " "
  146. + statusCode + " while downloading " + url.toExternalForm());
  147. return; // Not retriable, drop out.
  148. }
  149. if (statusCode / 100 == 5) { // 5xx errors
  150. observer.downloadErrored(url, Utils.getLocalizedString("retriable.status.code") + " " + statusCode
  151. + " while downloading " + url.toExternalForm());
  152. // Throw exception so download can be retried
  153. throw new IOException(Utils.getLocalizedString("retriable.status.code") + " " + statusCode);
  154. }
  155. if (huc.getContentLength() == 503 && urlToDownload.getHost().endsWith("imgur.com")) {
  156. // Imgur image with 503 bytes is "404"
  157. logger.error("[!] Imgur image is 404 (503 bytes long): " + url);
  158. observer.downloadErrored(url, "Imgur image is 404: " + url.toExternalForm());
  159. return;
  160. }
  161. // If the ripper is using the bytes progress bar set bytesTotal to
  162. // huc.getContentLength()
  163. if (observer.useByteProgessBar()) {
  164. bytesTotal = huc.getContentLength();
  165. observer.setBytesTotal(bytesTotal);
  166. observer.sendUpdate(STATUS.TOTAL_BYTES, bytesTotal);
  167. logger.debug("Size of file at " + this.url + " = " + bytesTotal + "b");
  168. }
  169. // Save file
  170. InputStream bis;
  171. bis = new BufferedInputStream(huc.getInputStream());
  172. // Check if we should get the file ext from the MIME type
  173. if (getFileExtFromMIME) {
  174. String fileExt = URLConnection.guessContentTypeFromStream(bis);
  175. if (fileExt != null) {
  176. fileExt = fileExt.replaceAll("image/", "");
  177. saveAs = new File(saveAs.toString() + "." + fileExt);
  178. } else {
  179. logger.error("Was unable to get content type from stream");
  180. // Try to get the file type from the magic number
  181. byte[] magicBytes = new byte[8];
  182. bis.read(magicBytes, 0, 5);
  183. bis.reset();
  184. fileExt = Utils.getEXTFromMagic(magicBytes);
  185. if (fileExt != null) {
  186. saveAs = new File(saveAs.toString() + "." + fileExt);
  187. } else {
  188. logger.error(Utils.getLocalizedString("was.unable.to.get.content.type.using.magic.number"));
  189. logger.error(
  190. Utils.getLocalizedString("magic.number.was") + ": " + Arrays.toString(magicBytes));
  191. }
  192. }
  193. }
  194. // If we're resuming a download we append data to the existing file
  195. OutputStream fos = null;
  196. if (statusCode == 206) {
  197. fos = new FileOutputStream(saveAs, true);
  198. } else {
  199. try {
  200. fos = new FileOutputStream(saveAs);
  201. } catch (FileNotFoundException e) {
  202. // We do this because some filesystems have a max name length
  203. if (e.getMessage().contains("File name too long")) {
  204. logger.error("The filename " + saveAs.getName()
  205. + " is to long to be saved on this file system.");
  206. logger.info("Shortening filename");
  207. String[] saveAsSplit = saveAs.getName().split("\\.");
  208. // Get the file extension so when we shorten the file name we don't cut off the
  209. // file extension
  210. String fileExt = saveAsSplit[saveAsSplit.length - 1];
  211. // The max limit for filenames on Linux with Ext3/4 is 255 bytes
  212. logger.info(saveAs.getName().substring(0, 254 - fileExt.length()) + fileExt);
  213. String filename = saveAs.getName().substring(0, 254 - fileExt.length()) + "." + fileExt;
  214. // We can't just use the new file name as the saveAs because the file name
  215. // doesn't include the
  216. // users save path, so we get the user save path from the old saveAs
  217. saveAs = new File(saveAs.getParentFile().getAbsolutePath() + File.separator + filename);
  218. fos = new FileOutputStream(saveAs);
  219. } else if (saveAs.getAbsolutePath().length() > 259 && Utils.isWindows()) {
  220. // This if is for when the file path has gone above 260 chars which windows does
  221. // not allow
  222. fos = Files.newOutputStream(
  223. Utils.shortenSaveAsWindows(saveAs.getParentFile().getPath(), saveAs.getName()));
  224. assert fos != null: "After shortenSaveAsWindows: " + saveAs.getAbsolutePath();
  225. }
  226. assert fos != null: e.getStackTrace();
  227. }
  228. }
  229. byte[] data = new byte[1024 * 256];
  230. int bytesRead;
  231. boolean shouldSkipFileDownload = huc.getContentLength() / 1000000 >= 10 && AbstractRipper.isThisATest();
  232. // If this is a test rip we skip large downloads
  233. if (shouldSkipFileDownload) {
  234. logger.debug("Not downloading whole file because it is over 10mb and this is a test");
  235. } else {
  236. while ((bytesRead = bis.read(data)) != -1) {
  237. try {
  238. observer.stopCheck();
  239. } catch (IOException e) {
  240. observer.downloadErrored(url, Utils.getLocalizedString("download.interrupted"));
  241. return;
  242. }
  243. fos.write(data, 0, bytesRead);
  244. if (observer.useByteProgessBar()) {
  245. bytesDownloaded += bytesRead;
  246. observer.setBytesCompleted(bytesDownloaded);
  247. observer.sendUpdate(STATUS.COMPLETED_BYTES, bytesDownloaded);
  248. }
  249. }
  250. }
  251. bis.close();
  252. fos.close();
  253. break; // Download successful: break out of infinite loop
  254. } catch (SocketTimeoutException timeoutEx) {
  255. // Handle the timeout
  256. logger.error("[!] " + url.toExternalForm() + " timedout!");
  257. // Download failed, break out of loop
  258. break;
  259. } catch (HttpStatusException hse) {
  260. logger.debug(Utils.getLocalizedString("http.status.exception"), hse);
  261. logger.error("[!] HTTP status " + hse.getStatusCode() + " while downloading from " + urlToDownload);
  262. if (hse.getStatusCode() == 404 && Utils.getConfigBoolean("errors.skip404", false)) {
  263. observer.downloadErrored(url,
  264. "HTTP status code " + hse.getStatusCode() + " while downloading " + url.toExternalForm());
  265. return;
  266. }
  267. } catch (IOException | URISyntaxException e) {
  268. logger.debug("IOException", e);
  269. logger.error("[!] " + Utils.getLocalizedString("exception.while.downloading.file") + ": " + url + " - "
  270. + e.getMessage());
  271. } catch (NullPointerException npe){
  272. logger.error("[!] " + Utils.getLocalizedString("failed.to.download") + " for URL " + url);
  273. observer.downloadErrored(url,
  274. Utils.getLocalizedString("failed.to.download") + " " + url.toExternalForm());
  275. return;
  276. }
  277. if (tries > this.retries) {
  278. logger.error("[!] " + Utils.getLocalizedString("exceeded.maximum.retries") + " (" + this.retries
  279. + ") for URL " + url);
  280. observer.downloadErrored(url,
  281. Utils.getLocalizedString("failed.to.download") + " " + url.toExternalForm());
  282. return;
  283. } else {
  284. if (retrySleep > 0) {
  285. Utils.sleep(retrySleep);
  286. }
  287. }
  288. } while (true);
  289. observer.downloadCompleted(url, saveAs.toPath());
  290. logger.info("[+] Saved " + url + " as " + this.prettySaveAs);
  291. }
  292. }