edu.lternet.pasta.dml.database.TextComplexFormatDataReader.java Source code

Java tutorial

Introduction

Here is the source code for edu.lternet.pasta.dml.database.TextComplexFormatDataReader.java

Source

/**
 *    '$RCSfile: TextComplexFormatDataReader.java,v $'
 *
 *     '$Author: costa $'
 *       '$Date: 2006-11-06 21:18:34 $'
 *   '$Revision: 1.2 $'
 *
 *  For Details: http://kepler.ecoinformatics.org
 *
 * Copyright (c) 2003 The Regents of the University of California.
 * All rights reserved.
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the
 * above copyright notice and the following two paragraphs appear in
 * all copies of this software.
 *
 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
 * IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
 * OF SUCH DAMAGE.
 *
 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
 * OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
 * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 */
package edu.lternet.pasta.dml.database;

import java.io.InputStream;
import java.util.Vector;

//import org.apache.commons.logging.Log;
//import org.apache.commons.logging.LogFactory;
//import org.ecoinformatics.util.DelimitedReader;
import edu.lternet.pasta.dml.parser.Attribute;
import edu.lternet.pasta.dml.parser.Entity;
import edu.lternet.pasta.dml.parser.TextComplexDataFormat;
import edu.lternet.pasta.dml.parser.TextDelimitedDataFormat;
import edu.lternet.pasta.dml.parser.TextWidthFixedDataFormat;

/**
 * @author tao
 * 
 * This class will read a data inputstream and split them into a row vectors base
 * on the given ComplexDataFormat array. This class have a public method -
 * getRowVector.After reach the end of stream, empty vector 
 * will be returned. So this method can be iterated by a while loop until
 * a empty vector hited. During the iteration, every data in the stream will
 * be pulled out.
 */
public class TextComplexFormatDataReader extends TextDataReader {
    /*
     * Class fields
     */

    /*private static Log log;
        
    static {
       log = LogFactory.getLog( "org.kepler.objectmanager.data.text.TextComplexFormatDataReader" );
    }*/

    //constants
    public static final String DEFAULTVALUE = "";

    /*
     * Instance fields
     */
    private InputStream dataStream = null;
    private Entity entity = null;
    private boolean stripHeader = true;
    private int numberOfAttirbute = 0;
    private TextComplexDataFormat[] formats = null;
    private String physicalLineDelimiter = null;
    private int numberOfHeaderLines = 0;
    private int physicalLineDelimiterLength = 0;
    private int headLineNumberCount = 0;

    /*
     * Constructors
     */

    /**
     * Consturctor with default stripHeader value - true.
     * 
     * @param dataStream  the data input stream
     * @param enity       the entity metadata to describe the data stream
     */
    public TextComplexFormatDataReader(InputStream dataStream, Entity entity) throws Exception {
        this(dataStream, entity, true);
    }

    /**
     * Constructor with assigned stripHeader value.
     * 
     * @param dataStream  the data input stream
     * @param enity       the entity metadata to describe the data stream
     * @param stripHeader if strip header when we hand read the input stream
     */
    public TextComplexFormatDataReader(InputStream dataStream, Entity entity, boolean stripHeader)
            throws Exception {
        if (dataStream == null || entity == null) {
            throw new Exception("Data inputstream or entity metadata is null");
        }
        this.dataStream = dataStream;
        this.entity = entity;
        this.stripHeader = stripHeader;
        getParameterFromEntity();

    }

    /*
     * Method to set up other parameter will be used in the reader.
     * Such as numberOfArributes, physicalLineDelimiter. 
     */
    private void getParameterFromEntity() throws Exception {
        Attribute[] attributeList = entity.getAttributes();

        if (attributeList == null) {
            throw new Exception("Attribute in entity metadata is null");
        } else {
            numberOfAttirbute = attributeList.length;
        }

        numberOfHeaderLines = entity.getNumHeaderLines();

        if (numberOfHeaderLines == -1) {
            numberOfHeaderLines = 0;
        }

        //physicalLineDelmiter will get from physicalDelimiter elements
        // if no physicalDelimiter element, we will try record delimter
        physicalLineDelimiter = entity.getPhysicalLineDelimiter();

        if (physicalLineDelimiter == null) {
            physicalLineDelimiter = entity.getRecordDelimiter();
        }

        physicalLineDelimiter = DelimitedReader.unescapeDelimiter(physicalLineDelimiter);
        physicalLineDelimiterLength = physicalLineDelimiter.length();

        formats = entity.getDataFormatArray();

        if (formats == null) {
            throw new Exception("Complext format is null in metadata entity");
        } else {
            int length = formats.length;

            if (length != numberOfAttirbute) {
                throw new Exception("Complex formats should have same number as attribute number");
            }
        }
    }

    /**
     * This method will read one row from inputstream and return a data vector 
     * which element is String and the value is field data. After reach the end 
     * of stream, empty vector will be returned. So this method can be iterated 
     * by a while loop until a empty vector hited. During the iteration, every 
     * data in the stream will be pulled out.
     * 
     * @return Vector
     */
    public Vector getOneRowDataVector() throws Exception {
        Vector oneRowDataVector = new Vector();
        StringBuffer lineDelimiterBuffer = new StringBuffer();// to store delmiter
        StringBuffer fieldValueBuffer = new StringBuffer();
        int singleCharactor = -2;
        int columnCount = 1;// this is for every character in one row
        int attributeCount = 0; // this is for every attribute
        boolean startNewAttribute = true;
        boolean isWidthFix = true;
        int width = -1;
        int widthCount = 0;
        boolean startWidthCount = false;
        int startColumnNumberFromFormat = -1;
        String fieldDelimiter = null;

        if (dataStream != null) {
            singleCharactor = dataStream.read();

            while (singleCharactor != -1) {
                char charactor = (char) singleCharactor;
                // strip header
                if (stripHeader && numberOfHeaderLines > 0 && headLineNumberCount < numberOfHeaderLines) {
                    lineDelimiterBuffer.append(charactor);
                    if (lineDelimiterBuffer.length() == physicalLineDelimiterLength
                            && lineDelimiterBuffer.toString().equals(physicalLineDelimiter)) {
                        //reset the delimiter buffer
                        lineDelimiterBuffer = new StringBuffer();
                        headLineNumberCount++;
                    } else if (lineDelimiterBuffer.length() == physicalLineDelimiterLength) {
                        // reset the delimiter buffer
                        lineDelimiterBuffer = new StringBuffer();
                    }

                } else {
                    // handle data after strip header
                    fieldValueBuffer.append(charactor);
                    lineDelimiterBuffer.append(charactor);

                    // set up format info
                    if (startNewAttribute) {
                        startNewAttribute = false;
                        //find the format from array
                        TextComplexDataFormat format = formats[attributeCount];
                        if (format == null) {
                            throw new Exception("The text format is null for an attribute");
                        } else if (format instanceof TextWidthFixedDataFormat) {
                            TextWidthFixedDataFormat widthFormat = (TextWidthFixedDataFormat) format;
                            width = widthFormat.getFieldWidth();
                            startColumnNumberFromFormat = widthFormat.getFieldStartColumn();
                            isWidthFix = true;
                            startWidthCount = false;

                        } else if (format instanceof TextDelimitedDataFormat) {
                            TextDelimitedDataFormat delimitedFormat = (TextDelimitedDataFormat) format;
                            fieldDelimiter = delimitedFormat.getFieldDelimiter();
                            isWidthFix = false;
                        }
                    }

                    if (isWidthFix) {
                        // find start cloumn if metadata specify it
                        if (startColumnNumberFromFormat != -1 && startColumnNumberFromFormat == columnCount) {
                            fieldValueBuffer = new StringBuffer();
                            fieldValueBuffer.append(charactor);
                            startWidthCount = true;
                        } else if (startColumnNumberFromFormat == -1) {
                            startWidthCount = true;
                        }
                        // start count width
                        if (startWidthCount) {
                            widthCount++;
                        }
                        // we got the value when widthcount reach width of this format
                        if (widthCount == width) {
                            String value = fieldValueBuffer.toString();
                            //log.debug("Add width fixed attribute value " + value +
                            //       " to the vector");
                            oneRowDataVector.add(value.trim());
                            widthCount = 0;
                            startWidthCount = false;
                            fieldValueBuffer = new StringBuffer();
                            startNewAttribute = true;
                            attributeCount++;
                        }

                    } else {
                        // for delimter data
                        if (fieldValueBuffer.toString().endsWith(fieldDelimiter)) {
                            String value = fieldValueBuffer.toString();
                            value = value.substring(0, value.length() - fieldDelimiter.length());
                            //log.debug("Add delimited attribute value " + value +
                            //        " to the vector" );
                            oneRowDataVector.add(value.trim());
                            fieldValueBuffer = new StringBuffer();
                            startNewAttribute = true;
                            attributeCount++;
                        }
                    }

                    columnCount++;

                    // reset columnCount to 1 when hit a physical line delimiter
                    if (lineDelimiterBuffer.length() == physicalLineDelimiterLength
                            && lineDelimiterBuffer.toString().equals(physicalLineDelimiter)) {
                        //reset the delimiter buffer
                        lineDelimiterBuffer = new StringBuffer();
                        columnCount = 1;
                    } else if (lineDelimiterBuffer.length() == physicalLineDelimiterLength) {
                        // reset the delimiter buffer
                        lineDelimiterBuffer = new StringBuffer();
                    }

                    // get a row vector break it.
                    if (attributeCount == numberOfAttirbute) {
                        break;
                    }
                }

                singleCharactor = dataStream.read();
            }
        }

        // if row vector is not empty and its length less than number of 
        // attributes, we should add "" string to make its length equal to
        // the attribute length.
        if (!oneRowDataVector.isEmpty() && oneRowDataVector.size() < numberOfAttirbute) {
            int size = oneRowDataVector.size();

            for (int i = size; i < numberOfAttirbute; i++) {
                oneRowDataVector.add(DEFAULTVALUE);
            }
        }

        return oneRowDataVector;
    }

    /**
     * @return Returns the dataStream field.
     */
    public InputStream getDataStream() {
        return dataStream;
    }

    /**
     * Sets the dataStream field to a given input stream.
     * 
     * @param dataStream The InputStream value to set.
     */
    public void setDataStream(InputStream dataStream) {
        this.dataStream = dataStream;
    }

    /**
     * Gets the value of the entity field.
     * 
     * @return Returns the entity field.
     */
    public Entity getEntity() {
        return entity;
    }

    /**
     * Sets the value of the entity field to the specified Entity object.
     * 
     * @param entity  The Entity value to set.
     */
    public void setEntity(Entity entity) {
        this.entity = entity;
    }

}