org.tinymediamanager.scraper.util.CachedUrl.java Source code

Java tutorial

Introduction

Here is the source code for org.tinymediamanager.scraper.util.CachedUrl.java

Source

/*
 * Copyright 2012 - 2015 Manuel Laggner
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.tinymediamanager.scraper.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The Class CachedUrl. Used for caching URL requests. The requests will get cached into the cache folder for a short period (making recurring calls
 * faster).
 * 
 * @author Manuel Laggner
 */
public class CachedUrl extends Url {
    private static final Logger LOGGER = LoggerFactory.getLogger(CachedUrl.class);
    public static final String CACHE_DIR = "cache/url";
    private static final int CACHE_EXPIRY = 300;
    private static final int IMAGE_FACTOR = 48;
    private static final Pattern pattern = Pattern.compile("([^\\s]+(\\.(?i)(jpg|png|gif|bmp))$)");

    private String urlId = null;
    private File propFile = null;
    private Properties props = null;
    private File urlCacheDir = null;

    /**
     * Instantiates a new cached url.
     * 
     * @param url
     *          the url
     * @throws IOException
     *           Signals that an I/O exception has occurred.
     */
    public CachedUrl(String url) throws IOException {
        super(url);

        urlId = getCachedFileName(url);
        propFile = new File(getCacheDir(), urlId + ".properties");
        props = new Properties();
        if (propFile.exists()) {
            LOGGER.debug("Reloading existing cached url: " + propFile.getAbsolutePath() + " with id: " + urlId);
            PropertiesUtils.load(props, propFile);
            File f = getCachedFile();
            if (f.exists() && (isExpired(f) || f.length() == 0)) {
                LOGGER.info("Removing Cached Url File: " + f);
                f.delete();
            }
        } else {
            File f = propFile.getParentFile();
            f.mkdirs();
            LOGGER.debug("Creating a new cached url for: " + url);
            props.setProperty("url", url);
            props.setProperty("file", new File(getCacheDir(), urlId + ".cache").getPath());
        }

        // sanity check
        if (!url.equalsIgnoreCase(props.getProperty("url"))) {
            LOGGER.error("The Cached url does not match the one passed! " + props.getProperty("url") + " != " + url
                    + "; Propfile Name: " + propFile);
            props.setProperty("url", url);
            File f = getCachedFile();
            if (f.exists()) {
                LOGGER.info("Removing cached content for url: " + url);
                if (!f.delete()) {
                    LOGGER.warn("Failed to delete file: " + f);
                }
            }
        }
    }

    /**
     * Gets the cached file name.
     * 
     * @param url
     *          the url
     * @return the cached file name
     */
    private static String getCachedFileName(String url) {
        try {
            if (url == null)
                return null;
            // now uses a simple md5 hash, which should have a fairly low collision rate, especially for our limited use
            byte[] key = DigestUtils.md5(url);
            return new String(Hex.encodeHex(key));
        } catch (Exception e) {
            LOGGER.error("Failed to create cached filename for url: " + url, e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Gets the cache dir.
     * 
     * @return the cache dir
     */
    private File getCacheDir() {
        if (urlCacheDir == null) {
            urlCacheDir = new File(CACHE_DIR);
            if (!urlCacheDir.exists())
                urlCacheDir.mkdirs();
        }
        return urlCacheDir;
    }

    /**
     * Checks if is expired.
     * 
     * @param cachedFile
     *          the cached file
     * @return true, if is expired
     */
    private boolean isExpired(File cachedFile) {
        int expirySecs = CACHE_EXPIRY;
        if (isImageFile(props)) {
            expirySecs = expirySecs * IMAGE_FACTOR;
        }
        return isExpired(cachedFile, expirySecs);
    }

    /**
     * Checks if is expired.
     * 
     * @param cachedFile
     *          the cached file
     * @param expirySecs
     *          the expiry secs
     * @return true, if is expired
     */
    private static boolean isExpired(File cachedFile, long expirySecs) {
        long diff = (System.currentTimeMillis() - cachedFile.lastModified()) / 1000;
        boolean expired = (diff > expirySecs) || (diff < 0); // lastModified in the future.. who does that? well there are users doing this.
        if (expired) {
            LOGGER.debug("CachedUrl.isExpired(): " + expired + "; File: " + cachedFile + "; LastModified: "
                    + cachedFile.lastModified() + "; Current Time: " + System.currentTimeMillis() + "; Expiry: "
                    + expirySecs + "s; Diff: " + diff + "s");
        }
        return expired;
    }

    /**
     * Checks if is image file.
     * 
     * @param props
     *          the props
     * @return true, if is image file
     */
    private static boolean isImageFile(Properties props) {
        String filename = props.getProperty("file");
        Matcher matcher = pattern.matcher(filename);
        return matcher.matches();
    }

    /**
     * Checks if is expired.
     * 
     * @param cachedFile
     *          the cached file
     * @param props
     *          the props
     * @return true, if is expired
     */
    private static boolean isExpired(File cachedFile, Properties props) {
        int expirySecs = CACHE_EXPIRY;
        if (isImageFile(props)) {
            expirySecs = expirySecs * IMAGE_FACTOR;
        }
        return isExpired(cachedFile, expirySecs);
    }

    /**
     * Gets the cached file.
     * 
     * @return the cached file
     */
    public File getCachedFile() {
        return getCachedFile(props);
    }

    /**
     * Removes the cached file. For example if an image download is broken
     */
    public void removeCachedFile() {
        File f = getCachedFile();
        if (f.exists()) {
            LOGGER.info("Removing Cached Url File: " + f);
            f.delete();
        }
    }

    /**
     * Gets the cached file.
     * 
     * @param props
     *          the props
     * @return the cached file
     */
    public static File getCachedFile(Properties props) {
        return new File(props.getProperty("file"));
    }

    @Override
    public InputStream getInputStream() throws IOException, InterruptedException {
        try {
            URL u = getUrl();
            return u.openStream();
        } catch (IOException e) {
            removeCachedFile();
            throw e;
        } catch (InterruptedException e) {
            removeCachedFile();
            throw e;
        }
    }

    @Override
    public URL getUrl() throws IOException, InterruptedException {
        try {
            File f = getCachedFile();
            if (!f.exists() || f.length() == 0) {
                cache();
            } else {
                LOGGER.debug("Cached File exists: " + f.getAbsolutePath() + " so we'll just use it.");
            }
            // check if its still empty (maybe broken download)
            if (!f.exists() || f.length() == 0) {
                // return URI without caching
                return new URL(url);
            }
            return f.toURI().toURL();
        } catch (IOException e) {
            removeCachedFile();
            throw e;
        } catch (InterruptedException e) {
            removeCachedFile();
            throw e;
        }
    }

    /**
     * Cache.
     * 
     * @throws IOException
     *           Signals that an I/O exception has occurred.
     * @throws InterruptedException
     */
    private void cache() throws IOException, InterruptedException {
        LOGGER.debug("Caching Url: " + url);
        long sizeHttp = -1;
        // workaround for local files
        InputStream is = null;
        if (!url.startsWith("file:")) {
            Url u = new Url(url);
            u.addHeaders(headersRequest);
            is = u.getInputStream();
            sizeHttp = u.getContentLength();

            // also store encoding
            if (u.getCharset() != null) {
                props.setProperty("encoding", u.getCharset().toString());
            }
        } else {
            String newUrl = url.replace("file:", "");
            File file = new File(newUrl);
            is = new FileInputStream(file);
        }
        File f = getCachedFile();
        if (is == null || isFault()) {
            LOGGER.debug("Url " + url + ": did not receive a response; writing empty file");
            f.createNewFile();
            return;
        }
        FileOutputStream fos = new FileOutputStream(f);
        long sizeCopy = IOUtils.copy(is, fos);
        fos.flush();
        try {
            fos.getFD().sync(); // wait until file has been completely written
        } catch (Exception e) {
            // empty -> just do not crash the thread
        }
        fos.close();
        is.close();

        if (sizeHttp > 0 && sizeHttp != sizeCopy) {
            LOGGER.warn("File not fully cached! " + f.getAbsolutePath());
        }
        LOGGER.debug("Url " + url + " Cached To: " + f.getAbsolutePath());
        PropertiesUtils.store(props, getPropertyFile(), "Cached Url Properties");
    }

    /**
     * Gets the property file.
     * 
     * @return the property file
     */
    private File getPropertyFile() {
        return propFile;
    }

    /**
     * Clear expired cache files.
     */
    public static void cleanupCache() {
        // filter all .properties
        FilenameFilter filter = new FilenameFilter() {
            public boolean accept(File dir, String name) {
                // do not start with .
                if (name.toLowerCase().startsWith("."))
                    return false;

                if (name.toLowerCase().endsWith(".properties")) {
                    return true;
                }

                return false;
            }
        };

        File urlCacheDir = new File(CACHE_DIR);
        if (!urlCacheDir.exists())
            return;

        File[] cachedFiles = urlCacheDir.listFiles(filter);

        // check all cached files
        for (File propFile : cachedFiles) {
            Properties props = new Properties();
            try {
                PropertiesUtils.load(props, propFile);
                File f = getCachedFile(props);

                // cleanup expired cache file
                if (f.exists() && (isExpired(f, props) || f.length() == 0)) {
                    f.delete();
                    propFile.delete();
                }
            } catch (Exception e) {
                LOGGER.warn(e.getMessage());
            }
        }
    }

    /**
     * Clear the cache for one specific host
     * 
     * @param host
     */
    public static void cleanupCacheForSpecificHost(String host) {
        // filter all .properties
        FilenameFilter filter = new FilenameFilter() {
            public boolean accept(File dir, String name) {
                // do not start with .
                if (name.toLowerCase().startsWith("."))
                    return false;

                if (name.toLowerCase().endsWith(".properties")) {
                    return true;
                }

                return false;
            }
        };

        File urlCacheDir = new File(CACHE_DIR);
        if (!urlCacheDir.exists())
            return;

        File[] cachedFiles = urlCacheDir.listFiles(filter);

        // check all cached files
        for (File propFile : cachedFiles) {
            Properties props = new Properties();
            try {
                PropertiesUtils.load(props, propFile);
                if (props.containsKey("url") && props.getProperty("url").contains(host)) {
                    File f = getCachedFile(props);

                    // cleanup expired cache file
                    if (f.exists()) {
                        f.delete();
                        propFile.delete();
                    }
                }
            } catch (Exception e) {
                LOGGER.warn(e.getMessage());
            }
        }
    }

    /**
     * Remove a specific URL from the cache
     * 
     * @param url
     */
    public static void removeCachedFileForUrl(String url) {
        try {
            String cachedFilename = getCachedFileName(url);
            File propFile = new File(CACHE_DIR, cachedFilename + ".properties");
            Properties props = new Properties();
            if (propFile.exists()) {
                PropertiesUtils.load(props, propFile);
                File f = getCachedFile(props);
                if (f.exists()) {
                    f.delete();
                    propFile.delete();
                }
            }
        } catch (Exception e) {
        }
    }

    /**
     * Gets the charset.
     * 
     * @return the charset
     */
    @Override
    public Charset getCharset() {
        Charset charset = null;

        try {
            // take the charset from the cached file
            charset = Charset.forName(props.getProperty("encoding"));
        } catch (Exception e) {
            charset = Charset.defaultCharset();
        }

        return charset;
    }
}