edu.umn.cs.spatialHadoop.nasa.HTTPInputStream.java Source code

Java tutorial

Introduction

Here is the source code for edu.umn.cs.spatialHadoop.nasa.HTTPInputStream.java

Source

/***********************************************************************
* Copyright (c) 2015 by Regents of the University of Minnesota.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Apache License, Version 2.0 which 
* accompanies this distribution and is available at
* http://www.opensource.org/licenses/apache2.0.php.
*
*************************************************************************/
package edu.umn.cs.spatialHadoop.nasa;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.PositionedReadable;
import org.apache.hadoop.fs.Seekable;

/**
 * A wrapper around a stream obtained from {@link URL#openStream()} that
 * makes it {@link Seekable} and {@link PositionedReadable} to be used
 * with {@link FSDataInputStream}, hence {@link HTTPFileSystem}.
 * 
 * All methods are delegated to the underlying (wrapped) input stream.
 * Non-supported methods raise an exception if called.
 * 
 * 
 * @author Ahmed Eldawy
 *
 */
public class HTTPInputStream extends InputStream implements Seekable, PositionedReadable {
    public static final Log LOG = LogFactory.getLog(HTTPInputStream.class);

    /**The underlying stream obtained from {@link URL#openStream()}*/
    private InputStream in;

    /**Cached value of content length.*/
    private long length;

    /**Current position in the file*/
    private long pos;

    /**The underlying URL. Used to reposition the stream on seeks*/
    private URL url;

    /**The underlying connection*/
    private HttpURLConnection conn;

    public HTTPInputStream(URL url) {
        this.url = url;
        this.pos = 0;
        this.length = -1; // Initially invalidate content length.
    }

    /**
     * Lazily opens a stream to the file if needed
     * @throws IOException 
     */
    private void lazyConnect() throws IOException {
        if (in == null) {
            if (conn == null) {
                int retries = Math.max(1, HTTPFileSystem.retries);
                while (conn == null && retries-- > 0) {
                    try {
                        conn = (HttpURLConnection) url.openConnection();
                    } catch (java.net.SocketException e) {
                        if (retries == 0)
                            throw e;
                        LOG.info("Error accessing file '" + url + "'. Trials left: " + retries);
                    } catch (java.net.UnknownHostException e) {
                        if (retries == 0)
                            throw e;
                        LOG.info("Error accessing file '" + url + "'. Trials left: " + retries);
                    }
                }
            }

            if (pos != 0) {
                // Need to reposition the stream at the middle
                long fileLength = getContentLength();
                conn.setRequestProperty("Range", String.format("bytes %d-%d/%d", pos, fileLength, fileLength));
            }
            int retries = Math.max(1, HTTPFileSystem.retries);
            while (in == null && retries-- > 0) {
                try {
                    this.in = conn.getInputStream();
                } catch (java.net.SocketException e) {
                    if (retries == 0)
                        throw e;
                    LOG.info("Error accessing file '" + url + "'. Trials left: " + retries);
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException e1) {
                        e1.printStackTrace();
                    }
                } catch (java.net.UnknownHostException e) {
                    if (retries == 0)
                        throw e;
                    LOG.info("Error accessing file '" + url + "'. Trials left: " + retries);
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException e1) {
                        e1.printStackTrace();
                    }
                }
            }
            if (in == null)
                throw new IOException("Could not open an HTTP connection");
        }
    }

    public int read() throws IOException {
        lazyConnect();
        int value = in.read();
        pos++;
        return value;
    }

    public int read(byte[] b) throws IOException {
        lazyConnect();
        int diff = in.read(b);
        pos += diff;
        return diff;
    }

    public int read(byte[] b, int off, int len) throws IOException {
        int retries = HTTPFileSystem.retries;
        int diff;
        do {
            lazyConnect();
            diff = in.read(b, off, len);
            pos += diff;
        } while (retries-- > 0 && diff < 0 && pos < getContentLength());
        return diff;
    }

    public long skip(long n) throws IOException {
        long skipped = in.skip(n);
        pos += skipped;
        return skipped;
    }

    public String toString() {
        return in.toString();
    }

    public int available() throws IOException {
        return in.available();
    }

    public void close() throws IOException {
        if (in != null)
            in.close();
        if (conn != null)
            conn.disconnect();
    }

    public void mark(int readlimit) {
        in.mark(readlimit);
    }

    public void reset() throws IOException {
        in.reset();
    }

    public boolean markSupported() {
        return in.markSupported();
    }

    @Override
    public int read(long position, byte[] buffer, int offset, int length) throws IOException {
        seek(position);
        return read(buffer, offset, length);
    }

    @Override
    public void readFully(long position, byte[] buffer, int offset, int length) throws IOException {
        seek(position);
        read(buffer, offset, length);
    }

    @Override
    public void readFully(long position, byte[] buffer) throws IOException {
        seek(position);
        read(buffer);
    }

    @Override
    public void seek(long newPos) throws IOException {
        if (newPos < pos || in == null) {
            // Reposition the stream and invalidate the underlying stream
            pos = newPos;
            if (this.in != null) {
                this.in.close();
            }
            this.in = null;
            if (this.conn != null) {
                this.conn.disconnect();
            }
            this.conn = null;
        } else {
            // Skip the difference and the #skip function will increment 'pos'
            skip(newPos - pos);
        }
    }

    @Override
    public long getPos() throws IOException {
        return pos;
    }

    @Override
    public boolean seekToNewSource(long targetPos) throws IOException {
        // Currently, we support only one URL
        // If we have mirrors, we can do a connection to a new mirror here
        return false;
    }

    private long getContentLength() throws IOException {
        if (length < 0) {
            int retries = Math.max(1, HTTPFileSystem.retries);
            HttpURLConnection localConn = null;
            while (localConn == null && retries-- > 0) {
                try {
                    localConn = (HttpURLConnection) url.openConnection();
                } catch (java.net.SocketException e) {
                    if (retries == 0)
                        throw e;
                    LOG.info("Error accessing file '" + url + "'. Trials left: " + retries);
                } catch (java.net.UnknownHostException e) {
                    if (retries == 0)
                        throw e;
                    LOG.info("Error accessing file '" + url + "'. Trials left: " + retries);
                }
            }
            length = localConn.getContentLength();
            localConn.disconnect();
        }
        return length;
    }

}