Java tutorial
/* * This file is part of Transitime.org * * Transitime.org is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License (GPL) as published by * the Free Software Foundation, either version 3 of the License, or * any later version. * * Transitime.org is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Transitime.org . If not, see <http://www.gnu.org/licenses/>. */ package org.transitime.utils; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import java.util.ArrayList; import java.util.List; import org.apache.http.HttpStatus; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Copies a file from the specified URL and stores it locally. Useful * for things such as reading a GTFS zip file and storing it on local * machine for further processing. * * @author SkiBu Smith * */ public class HttpGetFile { // UserAgenct info sent to web server when file is requested private static final String USER_AGENT = "Transitime"; // In case can't figure out file name to use to store the // file from the URL private static final String DEFAULT_FILE_NAME = "DEFAULT_FILE"; private final String urlStr; private final String dirNameForResult; private final String fullFileNameForResult; private final List<String> headerKeys = new ArrayList<String>(); private final List<String> headerValues = new ArrayList<String>(); protected static final Logger logger = LoggerFactory.getLogger(HttpGetFile.class); /********************** Member Functions **************************/ /** * Constructor * * @param urlStr * URL of file to get * @param dirName * Directory where gotten file is to be written */ public HttpGetFile(String urlStr, String dirName) { this.urlStr = urlStr; // Make sure directory name ends with a "/" if (!dirName.endsWith("/")) dirName += "/"; this.dirNameForResult = dirName; fullFileNameForResult = dirNameForResult + getFileNameFromUrl(urlStr); } /** * Adds specified header key/value to the request. Useful if need to set * If-Modified-Since or such. * * @param key * @param value */ public void addRequestHeader(String key, String value) { headerKeys.add(key); headerValues.add(value); } /** * Simply a getter. Returns the full name where the file is stored on the * local file system. * * @return full file name */ public String getFullFileName() { return fullFileNameForResult; } /** * Gets the file name from the URL so it can be used as part of the file * name for storing the results. * * @param urlStr * @return the file name to use for storing the results */ private static String getFileNameFromUrl(String urlStr) { int lastSlashPos = urlStr.lastIndexOf('/'); if (lastSlashPos == -1) { logger.error("Couldn't determine file name so using {}", DEFAULT_FILE_NAME); return DEFAULT_FILE_NAME; } return urlStr.substring(lastSlashPos + 1); } /** * Actually gets and stores the file. The User-Agency property is always set * to USER_AGENT. * * @return The http response code such as HttpStatus.SC_OK * @throws IOException */ public int getFile() throws IOException { IntervalTimer timer = new IntervalTimer(); logger.debug("Getting URL={}", urlStr); URL url = new URL(urlStr); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestProperty("User-Agency", USER_AGENT); // Set request properties for (int i = 0; i < headerKeys.size(); ++i) { connection.setRequestProperty(headerKeys.get(i), headerValues.get(i)); } // Get and log response code int responseCode = connection.getResponseCode(); long expectedContentLength = connection.getContentLengthLong(); long remoteFileLastModified = connection.getLastModified(); logger.debug( "Response code for getting file {} is {} and file size " + "is {} bytes and remote file lastModified=\"{}\" or {} msec", urlStr, responseCode, expectedContentLength, Time.httpDate(remoteFileLastModified), remoteFileLastModified); // Open file for where results are to be written File file = new File(fullFileNameForResult); // If file could not be read in or is not newer that lastModified time // of the existing file on the server then don't need to continue // reading it in. if (responseCode != HttpStatus.SC_OK) { logger.debug("Response code was {} so not reading in file", responseCode); return responseCode; } // Sometimes a web server will return http status OK (200) even // when the remote file is older than the time set for If-Modified-Since // header. For this situation still don't want to read in the file // so simply return http status NO_MODIFIED (304). if (file.lastModified() > 0 && remoteFileLastModified < file.lastModified()) { logger.warn("Response code was {} but the local file was modified " + "after the remote file so it must be up to date. " + "Therefore remote file not read in.", responseCode); return HttpStatus.SC_NOT_MODIFIED; } logger.debug( "Actually reading data from URL {} . Local file " + "lastModified={} or {} msec and remoteFileLastModified={} " + "or {} msec.", urlStr, Time.httpDate(file.lastModified()), file.lastModified(), Time.httpDate(remoteFileLastModified), remoteFileLastModified); // Make sure output directory exists file.getParentFile().mkdirs(); // Open input stream for reading data InputStream in = connection.getInputStream(); // Open the stream FileOutputStream fos = new FileOutputStream(file); IntervalTimer loopTimer = new IntervalTimer(); long lengthSinceLoggingMsg = 0; // Copy contents to file byte[] buffer = new byte[4096]; int length; int totalLength = 0; while ((length = in.read(buffer)) > 0) { fos.write(buffer, 0, length); totalLength += length; lengthSinceLoggingMsg += length; // Every once in a while log progress. Don't want to // check timer every loop since that would be expensive. // So only check timer for every MB downloaded. if (lengthSinceLoggingMsg > 1024 * 1024) { lengthSinceLoggingMsg = 0; if (loopTimer.elapsedMsec() > 10 * Time.MS_PER_SEC) { loopTimer.resetTimer(); logger.debug("Read in {} bytes or {}% of file {}", totalLength, StringUtils.oneDigitFormat(100.0 * totalLength / expectedContentLength), urlStr); } } } // Close things up fos.close(); // Set the last modified time so that it is the same as on the // web server. file.setLastModified(connection.getLastModified()); if (totalLength == expectedContentLength) logger.debug("Successfully copied {} to file {}. Length was {} " + "bytes. Took {} msec.", urlStr, fullFileNameForResult, totalLength, timer.elapsedMsec()); else logger.error("When copying {} to file {} the expected length was " + "{} but only copied {} bytes", urlStr, fullFileNameForResult, expectedContentLength, totalLength); // Return the http response code such as 200 for OK or 304 for // Not Modified return connection.getResponseCode(); } }