com.threadswarm.imagefeedarchiver.processor.RssItemProcessor.java Source code

Java tutorial

Introduction

Here is the source code for com.threadswarm.imagefeedarchiver.processor.RssItemProcessor.java

Source

/*
 * Copyright 2014 steve(at)threadswarm.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.threadswarm.imagefeedarchiver.processor;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Date;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;

import org.apache.commons.codec.binary.Hex;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.ConnectionClosedException;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.threadswarm.imagefeedarchiver.DownloadStatus;
import com.threadswarm.imagefeedarchiver.FeedUtils;
import com.threadswarm.imagefeedarchiver.dao.ProcessedRssItemDAO;
import com.threadswarm.imagefeedarchiver.model.ProcessedRssItem;
import com.threadswarm.imagefeedarchiver.model.RssItem;
import com.threadswarm.imagefeedarchiver.model.RssMediaContent;

public class RssItemProcessor implements Callable<ProcessedRssItem> {

    private final static Logger LOGGER = LoggerFactory.getLogger(RssItemProcessor.class);

    private final HttpClient httpClient;
    private final RssItem rssItem;
    private final ProcessedRssItemDAO itemDAO;
    private final File outputDirectory;
    private final List<Header> headerList;
    private final Set<URI> processedURISet;
    private final long downloadDelay;
    private final boolean delayRequired;
    private final boolean forceHttps;

    public RssItemProcessor(HttpClient httpClient, RssItem rssItem, ProcessedRssItemDAO itemDAO,
            File outputDirectory, List<Header> headerList, Set<URI> processedURISet, long downloadDelay,
            boolean forceHttps) {
        this.httpClient = httpClient;
        this.rssItem = rssItem;
        this.itemDAO = itemDAO;
        this.outputDirectory = outputDirectory;
        this.headerList = headerList;
        this.processedURISet = processedURISet;
        this.downloadDelay = downloadDelay;
        this.forceHttps = forceHttps;
        delayRequired = (downloadDelay > 0);
    }

    @Override
    public ProcessedRssItem call() throws Exception {
        ProcessedRssItem processedItem = new ProcessedRssItem(rssItem, DownloadStatus.PENDING);

        String refererUrlString = getRefererHeaderValue(headerList);
        if (refererUrlString != null)
            processedItem.setOriginatingFeedUrl(refererUrlString);

        List<RssMediaContent> mediaContentList = processedItem.getRssItem().getMediaContent();
        if (mediaContentList != null && !mediaContentList.isEmpty()) {
            for (RssMediaContent mediaContent : mediaContentList) {
                if (delayRequired)
                    Thread.sleep(downloadDelay);

                downloadRssMediaContent(processedItem, mediaContent);
            }
        }

        itemDAO.save(processedItem);

        return processedItem;
    }

    private void downloadRssMediaContent(ProcessedRssItem processedItem, RssMediaContent mediaContent) {
        DownloadStatus downloadStatus = DownloadStatus.FAILED;
        HttpEntity responseEntity = null;
        try {
            String targetUrlString = mediaContent.getUrlString();
            if (forceHttps)
                targetUrlString = FeedUtils.rewriteUrlStringToHttps(targetUrlString);

            URI targetURI = FeedUtils.getUriFromUrlString(targetUrlString);

            boolean freshURI = processedURISet.add(targetURI);
            if (!freshURI) {
                LOGGER.warn("Skipping previously processed URI: {}", targetURI);
                return; //abort processing
            }

            LOGGER.info("Attempting to download {}", targetURI);
            HttpGet imageGet = new HttpGet(targetURI);

            for (Header header : headerList)
                imageGet.addHeader(header);

            HttpResponse imageResponse = httpClient.execute(imageGet);

            String originalFileName = StringUtils.stripStart(targetURI.toURL().getFile(), "/");
            originalFileName = StringUtils.replace(originalFileName, "/", "_");
            File outputFile = getOutputFile(originalFileName);

            long expectedContentLength = FeedUtils.calculateBestExpectedContentLength(imageResponse, mediaContent);
            responseEntity = imageResponse.getEntity();

            BufferedInputStream bis = null;
            DigestOutputStream fos = null;
            int bytesRead = 0;
            try {
                bis = new BufferedInputStream(responseEntity.getContent());
                fos = new DigestOutputStream(new FileOutputStream(outputFile), MessageDigest.getInstance("SHA"));

                byte[] buffer = new byte[8192];
                while ((bytesRead = bis.read(buffer, 0, buffer.length)) != -1) {
                    fos.write(buffer, 0, bytesRead);
                }
                fos.flush();

                MessageDigest messageDigest = fos.getMessageDigest();
                byte[] digestBytes = messageDigest.digest();
                String digestString = Hex.encodeHexString(digestBytes);
                LOGGER.info("Downloaded - {} (SHA: {})", targetURI, digestString);

                processedItem.setDownloadDate(new Date());
                downloadStatus = DownloadStatus.COMPLETED;
                processedItem.setHash(digestString);
                processedItem.setFilename(outputFile.toString());
            } catch (ConnectionClosedException e) {
                LOGGER.error("An Exception was thrown while attempting to read HTTP entity content", e);
            } catch (NoSuchAlgorithmException e) {
                LOGGER.error("The SHA-1 hashing algorithm is not available on this JVM", e);
            } finally {
                IOUtils.closeQuietly(bis);
                IOUtils.closeQuietly(fos);
                EntityUtils.consumeQuietly(responseEntity);
                if (downloadStatus == DownloadStatus.FAILED
                        || (outputFile.exists() && outputFile.length() != expectedContentLength)) {
                    LOGGER.warn("Deleted partial/failed file: {}", outputFile);
                    outputFile.delete();
                    processedItem.setDownloadStatus(DownloadStatus.FAILED);
                }
            }
        } catch (IOException e) {
            LOGGER.error("An Exception was thrown while attempting to download image content", e);
        } catch (URISyntaxException e) {
            LOGGER.error("The supplied URI, {}, violates syntax rules", e);
        } finally {
            EntityUtils.consumeQuietly(responseEntity);
        }

        processedItem.setDownloadStatus(downloadStatus);
        itemDAO.save(processedItem);
    }

    private File getOutputFile(String originalFileName) {
        File outputFile = new File(outputDirectory, originalFileName);
        int count = 1;
        while (outputFile.exists()) {
            outputFile = new File(outputDirectory, originalFileName + "_" + count++);
        }

        return outputFile;
    }

    private static String getRefererHeaderValue(List<Header> headerList) {
        String refererHeaderValue = null;

        for (Header header : headerList) {
            if (HttpHeaders.REFERER.equals(header.getName())) {
                refererHeaderValue = header.getValue();
                break;
            }
        }

        return refererHeaderValue;
    }

}