com.ettrema.zsync.Upload.java Source code

Introduction

Here is the source code for com.ettrema.zsync.Upload.java
Source

/*
 * Copyright (C) 2012 McEvoy Software Ltd
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

package com.ettrema.zsync;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.SequenceInputStream;
import java.io.UnsupportedEncodingException;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;

import com.bradmcevoy.io.BufferingOutputStream;

/**
 * A container for the information transmitted in a ZSync PUT upload. The information currently consists of some
 * headers (file length, block size, etc...), an InputStream containing a list of RelocateRanges for relocating matching blocks, 
 * and an InputStream containing a sequence of data chunks (along with their ranges). The Upload class also contains methods for 
 * translating to/from a stream (getInputStream and parse, respectively).
 * 
 * @author Nick
 *
 */
public class Upload {

    /**
     * The character encoding used to convert Strings to bytes. The default is US-ASCII.
     * The methods involved in parsing assume one byte per character.
     */
    public final static String CHARSET = "US-ASCII";
    /**
     * The character marking the end of a line. The default is '\n'
     */
    public final static char LF = '\n';
    /**
     * A String that marks the beginning of a range of uploaded bytes. Currently unused.
     */
    public String DIV = "--DIVIDER";

    public final static String VERSION = "zsync";

    public final static String BLOCKSIZE = "Blocksize";

    public final static String FILELENGTH = "Length";
    /**
     * The total number of bytes of new data to be transmitted. Currently Unused.
     */
    public final static String NEWDATA = "ContentLength";

    public final static String SHA_1 = "SHA-1";

    public final static String RELOCATE = "Relocate";

    public final static String RANGE = "Range";

    private String version;
    private String sha1;
    private long blocksize;
    private long filelength;

    private InputStream relocStream;
    private InputStream dataStream;

    /**
     * Returns the list of headers in String format, in the proper format for upload. The
     * list is terminated by the LF character.
     *
     * @return A String containing the headers
     */
    public String getParams() {

        StringBuilder sbr = new StringBuilder();

        sbr.append(paramString(VERSION, version));
        sbr.append(paramString(FILELENGTH, filelength));
        sbr.append(paramString(BLOCKSIZE, blocksize));
        sbr.append(paramString(SHA_1, sha1));

        return sbr.toString();
    }

    public static String paramString(String key, Object value) {

        return key + ": " + value + LF;
    }

    /**
     * Constructs an empty Upload object. Its fields need to be set individually.
     */
    public Upload() {

        //this.relocList = new ArrayList<RelocateRange>();
        //this.dataList = new ArrayList<DataRange>();
    }

    /**
     * Parses the InputStream into an Upload object.<p/>
     * 
     * The method initially parses the headers from the InputStream by reading the sequence of keys (the String preceding the first colon in each line) 
     * and values ( the String following the colon and terminated by the LF character ) and invoking {@link #parseParam} on each key value pair. 
     * If the key is RELOCATE, then the value is not read, but is copied into a BufferingOutputStream and stored in the relocStream field. Parsing of headers
     * continues until a "blank" line is reached, ie a line that is null or contains only whitespace, which indicates the beginning of the data section.
     * A reference to the remaining InputStream is then stored in the dataStream field.<p/>
     * 
     * @param in The InputStream containing the ZSync upload
     * @return A filled in Upload object
     */
    public static Upload parse(InputStream in) {

        Upload um = new Upload();
        int bytesRead = 0; //Enables a ParseException to specify the offset

        try {
            //Maximum number of bytes to search for delimiters
            int MAX_SEARCH = 1024;

            String key;
            //Parse headers until a null/all-whitespace line is encountered
            while (!StringUtils.isBlank((key = readKey(in, MAX_SEARCH)))) {

                /*
                 * Add one to bytesRead since the delimiter was read but omitted from the String. 
                 * The final value of bytesRead may end up off by one if the end of input is reached, since no 
                 * delimiter is read in that case.
                 */
                bytesRead += key.length() + 1;
                key = key.trim();

                if (key.equalsIgnoreCase(RELOCATE)) {
                    /*
                     * Copies the Relocate values to a BufferingOutputStream
                     */
                    BufferingOutputStream relocOut = new BufferingOutputStream(16384);
                    bytesRead += copyLine(in, 1024 * 1024 * 64, relocOut);
                    relocOut.close();

                    um.setRelocStream(relocOut.getInputStream());

                } else {
                    /*
                     * Key is not "Relocate", so parse header
                     */
                    String value = readValue(in, MAX_SEARCH);
                    bytesRead += value.length() + 1;
                    value = value.trim();

                    um.parseParam(key, value);
                }
            }

            /*
             * A blank line has been read, indicating the end of the headers, so the unread
             * portion of the InputStream is the byte range section. 
             */

            um.setDataStream(in);

        } catch (IOException e) {
            throw new RuntimeException("Couldn't parse upload, IOException.", e);

        } catch (ParseException e) {

            //Set the offset of the ParseException to bytesRead
            ParseException ex = new ParseException(e.getMessage(), bytesRead);
            throw new RuntimeException(ex);
        }

        return um;
    }

    /**
     * Returns the next String terminated by one of the specified delimiters or the end of the InputStream.<p/>
     * 
     * This method simply reads from an InputStream one byte at a time, up to maxsearch bytes, until it reads a byte equal to one of the delimiters
     * or reaches the end of the stream. It uses the CHARSET encoding to translate the bytes read into a String, which it returns with delimiter excluded, 
     * or it throws a ParseException if maxSearch bytes are read without reaching a delimiter or the end of the stream.<p/>
     * 
     * A non-buffering method is used because a buffering reader would likely pull in part of the binary data
     * from the InputStream. An alternative is to use a BufferedReader with a given buffer size and use
     * mark and reset to get back binary data pulled into the buffer.
     * 
     * @param in The InputStream to read from
     * @param delimiters A list of byte values, each of which indicates the end of a token
     * @param maxsearch The maximum number of bytes to search for a delimiter
     * @return The String containing the CHARSET decoded String with delimiter excluded
     * @throws IOException
     * @throws ParseException If a delimiter byte is not found within maxsearch reads
     */
    public static String readToken(InputStream in, byte[] delimiters, int maxsearch)
            throws ParseException, IOException {

        if (maxsearch <= 0) {
            throw new RuntimeException("readToken: Invalid maxsearch " + maxsearch);
        }

        ByteBuffer bytes = ByteBuffer.allocate(maxsearch);
        byte nextByte;

        try {

            read: while ((nextByte = (byte) in.read()) > -1) {

                for (byte delimiter : delimiters) {
                    if (nextByte == delimiter) {
                        break read;
                    }
                }
                bytes.put(nextByte);
            }

            bytes.flip();
            return Charset.forName(CHARSET).decode(bytes).toString();

        } catch (BufferOverflowException ex) {

            throw new ParseException("Could not find delimiter within " + maxsearch + " bytes.", 0);
        }
    }

    /**
     * Helper method that reads the String preceding the first colon or newline in the InputStream.
     * 
     * @param in The InputStream to read from
     * @param maxsearch The maximum number of bytes allowed in the key
     * @return The CHARSET encoded String that was read
     * @throws ParseException If a colon, newline, or end of input is not reached within maxsearch reads
     * @throws IOException
     */
    private static String readKey(InputStream in, int maxsearch) throws ParseException, IOException {

        byte NEWLINE = Character.toString(LF).getBytes(CHARSET)[0];
        byte COLON = ":".getBytes(CHARSET)[0];
        byte[] delimiters = { NEWLINE, COLON };

        return readToken(in, delimiters, maxsearch);
    }

    /**
     * Helper method that reads the String preceding the first newline in the InputStream.
     * 
     * @param in The InputStream to read from
     * @param maxsearch The maximum number of bytes allowed in the value
     * @return The CHARSET encoded String that was read
     * @throws ParseException If a newline or end of input is not reached within maxsearch reads
     * @throws IOException
     */
    public static String readValue(InputStream in, int maxsearch) throws ParseException, IOException {

        byte NEWLINE = Character.toString(LF).getBytes(CHARSET)[0];
        byte[] delimiters = { NEWLINE };

        return readToken(in, delimiters, maxsearch);
    }

    /**
     * A helper method that reads from an InputStream and copies to an OutputStream until the LF character is read (The LF is not
     * copied to the OutputStream). An exception is thrown if maxsearch bytes are read without encountering LF. This is used by {@link #parse} 
     * to copy the relocate values into a BufferingOutputStream. 
     * 
     * @param in The InputStream to read from
     * @param maxsearch The maximum number of bytes to search for a newline
     * @param out The OutputStream to copy into
     * @return The number of bytes read from in
     * @throws IOException
     * @throws ParseException If a newline is not found within maxsearch reads
     */
    private static int copyLine(InputStream in, int maxsearch, OutputStream out)
            throws IOException, ParseException {

        if (maxsearch <= 0) {
            throw new RuntimeException("copyLine: Invalid maxsearch " + maxsearch);
        }

        byte nextByte, bytesRead = 0;
        byte NEWLINE = Character.toString(LF).getBytes(CHARSET)[0];

        while ((nextByte = (byte) in.read()) > -1) {

            if (++bytesRead > maxsearch) {
                throw new ParseException("Could not find delimiter within " + maxsearch + " bytes.", 0);
            }
            if (nextByte == NEWLINE) {
                break;
            }
            out.write(nextByte);
        }

        return bytesRead;
    }

    /**
     * Parses a String header by setting the appropriate field in upload if the key is recognized 
     * and ignoring keys that are not recognized.
     * 
     * @param key The key String with leading/trailing whitespace omitted
     * @param value The value String with leading/trailing whitespace omitted
     * @throws ParseException if the value of a recognized key cannot be properly parsed
     */
    private void parseParam(String key, String value) throws ParseException {

        if (StringUtils.isBlank(key) || StringUtils.isBlank(value)) {

            return;
        }
        try {
            if (key.equalsIgnoreCase(VERSION)) {
                this.setVersion(value);
            } else if (key.equalsIgnoreCase(FILELENGTH)) {
                this.setFilelength(Long.parseLong(value));
            } else if (key.equalsIgnoreCase(BLOCKSIZE)) {
                this.setBlocksize(Long.parseLong(value));
            } else if (key.equalsIgnoreCase(SHA_1)) {
                this.setSha1(value);
            }
        } catch (NumberFormatException ex) {

            throw new ParseException("Cannot parse " + value + " into a long.", -1);
        }
    }

    /**
     * Returns an InputStream containing a complete ZSync upload (Params, Relocate stream, and ByteRange stream), 
     * ready to be sent as the body of a PUT request. <p/>
     * 
     * Note: In this implementation, any temporary file used to store the RelocateRanges will be automatically deleted when this stream
     * is closed, so a second invocation of this method on the same Upload object is likely to throw an exception.
     * Therefore, this method should be used only once per Upload object.
     * 
     * @return The complete ZSync upload
     * @throws UnsupportedEncodingException
     * @throws IOException
     */
    public InputStream getInputStream() throws UnsupportedEncodingException, IOException {

        List<InputStream> streamList = new ArrayList<InputStream>();

        /*
         * The getParams and getRelocStream must be terminated by a single LF character.
         */
        streamList.add(IOUtils.toInputStream(getParams(), CHARSET));
        streamList.add(IOUtils.toInputStream(RELOCATE + ": ", CHARSET));
        streamList.add(getRelocStream());
        /* Prepend the data portion with a blank line. */
        streamList.add(IOUtils.toInputStream(Character.toString(LF), CHARSET));
        streamList.add(getDataStream());

        return new SequenceInputStream(new IteratorEnum<InputStream>(streamList));
    }

    /**
     * Gets the zsync version of the upload sender (client)
     */
    public String getVersion() {
        return version;
    }

    /**
     * Sets the zsync version of the upload sender (client)
     */
    public void setVersion(String version) {
        this.version = version;
    }

    /**
     * Gets the checksum for the entire source file
     */
    public String getSha1() {
        return sha1;
    }

    /**
     * Sets the checksum for the entire source file, which allow the server to validate the new file
     * after assembling it.
     */
    public void setSha1(String sha1) {
        this.sha1 = sha1;
    }

    /**
     * Gets the blocksize used in the upload. 
     */
    public long getBlocksize() {
        return blocksize;
    }

    /**
     * Sets the blocksize used in the upload. The server needs this to translate block ranges into byte ranges
     */
    public void setBlocksize(long blocksize) {
        //System.out.println("Upload: setBlockSize: " + blocksize);
        this.blocksize = blocksize;
    }

    /**
     * Gets the length of the (assembled) source file being uploaded
     */
    public long getFilelength() {
        return filelength;
    }

    /**
     * Sets the length of the (assembled) source file being uploaded
     */
    public void setFilelength(long filelength) {
        this.filelength = filelength;
    }

    /**
     *    
     * Gets the list of RelocateRanges, which tells the server which blocks of the previous
     * file to keep, and where to place them in the new file. The current format is a comma 
     * separated list terminated by LF.
     *
     */
    public InputStream getRelocStream() {
        return relocStream;
    }

    /**
     *    
     * Sets the list of RelocateRanges, which tells the server which blocks of the previous
     * file to keep, and where to place them in the new file. The current format is a comma 
     * separated list terminated by LF.
     *
     * @param relocStream 
     */
    public void setRelocStream(InputStream relocStream) {
        this.relocStream = relocStream;
    }

    /**
     * Gets the list of uploaded data chunks ( byte Ranges and their associated data ). 
     */
    public InputStream getDataStream() {
        return dataStream;
    }

    /**
     * Sets the list of data chunks to be uploaded ( byte Ranges and their associated data ).  The stream
     * should contain no leading whitespace.
     * 
     */
    public void setDataStream(InputStream dataStream) {
        this.dataStream = dataStream;
    }

    /**
     * An <code>Enumeration</code> wrapper for an Iterator. This is needed in order to construct
     * a <code>SequenceInputStream</code> (used to concatenate upload sections), which takes an <code>Enumeration</code> argument.
     * 
     * @author Nick
     *
     * @param <T> The type of object being enumerated
     */
    public static class IteratorEnum<T> implements Enumeration<T> {

        Iterator<T> iter;

        public IteratorEnum(List<T> list) {

            this.iter = list.iterator();
        }

        @Override
        public boolean hasMoreElements() {

            return iter.hasNext();
        }

        @Override
        public T nextElement() {

            return iter.next();
        }
    }

    /**
     * An object representing a (Key, Value) pair of Strings. Currently unused.
     * 
     * @author Nick
     *
     */
    public static class KeyValue {

        public String KEY;
        public String VALUE;

        public KeyValue(String key, String value) {

            this.KEY = key;
            this.VALUE = value;
        }

        /**
         * Parses a String of the form "foo: bar" into a KeyValue object whose KEY is the
         * String preceding the first colon and VALUE is the String following the first colon
         * ( leading and trailing whitespaces are removed from KEY and VALUE ). A ParseException is
         * thrown if the input String does not contain a colon.
         * 
         * @param kv A String of the form "foo: bar"
         * @return A KeyValue object with a KEY of "foo" and a VALUE of "bar"
         * @throws ParseException If no colon is found in <b>kv</b>
         */
        public static KeyValue parseKV(String kv) throws ParseException {

            int colonIndex = kv.indexOf(':');
            if (colonIndex == -1) {

                throw new ParseException("No colon found in \"" + kv + "\"", colonIndex);
            }

            String key = kv.substring(0, colonIndex).trim();
            String value = kv.substring(colonIndex + 1).trim();

            return new KeyValue(key, value);
        }
    }

}