Java tutorial
/* * Copyright 2014 steve(at)threadswarm.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.threadswarm.imagefeedarchiver.processor; import java.io.BufferedInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.security.DigestOutputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.Date; import java.util.List; import java.util.Set; import java.util.concurrent.Callable; import org.apache.commons.codec.binary.Hex; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.http.ConnectionClosedException; import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpHeaders; import org.apache.http.HttpResponse; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.util.EntityUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.threadswarm.imagefeedarchiver.DownloadStatus; import com.threadswarm.imagefeedarchiver.FeedUtils; import com.threadswarm.imagefeedarchiver.dao.ProcessedRssItemDAO; import com.threadswarm.imagefeedarchiver.model.ProcessedRssItem; import com.threadswarm.imagefeedarchiver.model.RssItem; import com.threadswarm.imagefeedarchiver.model.RssMediaContent; public class RssItemProcessor implements Callable<ProcessedRssItem> { private final static Logger LOGGER = LoggerFactory.getLogger(RssItemProcessor.class); private final HttpClient httpClient; private final RssItem rssItem; private final ProcessedRssItemDAO itemDAO; private final File outputDirectory; private final List<Header> headerList; private final Set<URI> processedURISet; private final long downloadDelay; private final boolean delayRequired; private final boolean forceHttps; public RssItemProcessor(HttpClient httpClient, RssItem rssItem, ProcessedRssItemDAO itemDAO, File outputDirectory, List<Header> headerList, Set<URI> processedURISet, long downloadDelay, boolean forceHttps) { this.httpClient = httpClient; this.rssItem = rssItem; this.itemDAO = itemDAO; this.outputDirectory = outputDirectory; this.headerList = headerList; this.processedURISet = processedURISet; this.downloadDelay = downloadDelay; this.forceHttps = forceHttps; delayRequired = (downloadDelay > 0); } @Override public ProcessedRssItem call() throws Exception { ProcessedRssItem processedItem = new ProcessedRssItem(rssItem, DownloadStatus.PENDING); String refererUrlString = getRefererHeaderValue(headerList); if (refererUrlString != null) processedItem.setOriginatingFeedUrl(refererUrlString); List<RssMediaContent> mediaContentList = processedItem.getRssItem().getMediaContent(); if (mediaContentList != null && !mediaContentList.isEmpty()) { for (RssMediaContent mediaContent : mediaContentList) { if (delayRequired) Thread.sleep(downloadDelay); downloadRssMediaContent(processedItem, mediaContent); } } itemDAO.save(processedItem); return processedItem; } private void downloadRssMediaContent(ProcessedRssItem processedItem, RssMediaContent mediaContent) { DownloadStatus downloadStatus = DownloadStatus.FAILED; HttpEntity responseEntity = null; try { String targetUrlString = mediaContent.getUrlString(); if (forceHttps) targetUrlString = FeedUtils.rewriteUrlStringToHttps(targetUrlString); URI targetURI = FeedUtils.getUriFromUrlString(targetUrlString); boolean freshURI = processedURISet.add(targetURI); if (!freshURI) { LOGGER.warn("Skipping previously processed URI: {}", targetURI); return; //abort processing } LOGGER.info("Attempting to download {}", targetURI); HttpGet imageGet = new HttpGet(targetURI); for (Header header : headerList) imageGet.addHeader(header); HttpResponse imageResponse = httpClient.execute(imageGet); String originalFileName = StringUtils.stripStart(targetURI.toURL().getFile(), "/"); originalFileName = StringUtils.replace(originalFileName, "/", "_"); File outputFile = getOutputFile(originalFileName); long expectedContentLength = FeedUtils.calculateBestExpectedContentLength(imageResponse, mediaContent); responseEntity = imageResponse.getEntity(); BufferedInputStream bis = null; DigestOutputStream fos = null; int bytesRead = 0; try { bis = new BufferedInputStream(responseEntity.getContent()); fos = new DigestOutputStream(new FileOutputStream(outputFile), MessageDigest.getInstance("SHA")); byte[] buffer = new byte[8192]; while ((bytesRead = bis.read(buffer, 0, buffer.length)) != -1) { fos.write(buffer, 0, bytesRead); } fos.flush(); MessageDigest messageDigest = fos.getMessageDigest(); byte[] digestBytes = messageDigest.digest(); String digestString = Hex.encodeHexString(digestBytes); LOGGER.info("Downloaded - {} (SHA: {})", targetURI, digestString); processedItem.setDownloadDate(new Date()); downloadStatus = DownloadStatus.COMPLETED; processedItem.setHash(digestString); processedItem.setFilename(outputFile.toString()); } catch (ConnectionClosedException e) { LOGGER.error("An Exception was thrown while attempting to read HTTP entity content", e); } catch (NoSuchAlgorithmException e) { LOGGER.error("The SHA-1 hashing algorithm is not available on this JVM", e); } finally { IOUtils.closeQuietly(bis); IOUtils.closeQuietly(fos); EntityUtils.consumeQuietly(responseEntity); if (downloadStatus == DownloadStatus.FAILED || (outputFile.exists() && outputFile.length() != expectedContentLength)) { LOGGER.warn("Deleted partial/failed file: {}", outputFile); outputFile.delete(); processedItem.setDownloadStatus(DownloadStatus.FAILED); } } } catch (IOException e) { LOGGER.error("An Exception was thrown while attempting to download image content", e); } catch (URISyntaxException e) { LOGGER.error("The supplied URI, {}, violates syntax rules", e); } finally { EntityUtils.consumeQuietly(responseEntity); } processedItem.setDownloadStatus(downloadStatus); itemDAO.save(processedItem); } private File getOutputFile(String originalFileName) { File outputFile = new File(outputDirectory, originalFileName); int count = 1; while (outputFile.exists()) { outputFile = new File(outputDirectory, originalFileName + "_" + count++); } return outputFile; } private static String getRefererHeaderValue(List<Header> headerList) { String refererHeaderValue = null; for (Header header : headerList) { if (HttpHeaders.REFERER.equals(header.getName())) { refererHeaderValue = header.getValue(); break; } } return refererHeaderValue; } }