org.ecoinformatics.seek.datasource.eml.eml2.Eml200Parser.java Source code

Java tutorial

Introduction

Here is the source code for org.ecoinformatics.seek.datasource.eml.eml2.Eml200Parser.java

Source

/*
 * Copyright (c) 2003-2010 The Regents of the University of California.
 * All rights reserved.
 *
 * '$Author: crawl $'
 * '$Date: 2012-11-26 14:19:36 -0800 (Mon, 26 Nov 2012) $' 
 * '$Revision: 31113 $'
 * 
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the above
 * copyright notice and the following two paragraphs appear in all copies
 * of this software.
 *
 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
 * ENHANCEMENTS, OR MODIFICATIONS.
 *
 */

package org.ecoinformatics.seek.datasource.eml.eml2;

import java.io.InputStream;
import java.util.Hashtable;
import java.util.List;
import java.util.Vector;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xpath.CachedXPathAPI;
import org.kepler.metadata.ParserInterface;
import org.kepler.objectmanager.data.DataType;
import org.kepler.objectmanager.data.DataTypeResolver;
import org.kepler.objectmanager.data.DateTimeDomain;
import org.kepler.objectmanager.data.Domain;
import org.kepler.objectmanager.data.EnumeratedDomain;
import org.kepler.objectmanager.data.NumericDomain;
import org.kepler.objectmanager.data.db.Attribute;
import org.kepler.objectmanager.data.db.AttributeList;
import org.kepler.objectmanager.data.db.Entity;
import org.kepler.objectmanager.data.text.TextComplexDataFormat;
import org.kepler.objectmanager.data.text.TextDelimitedDataFormat;
import org.kepler.objectmanager.data.text.TextDomain;
import org.kepler.objectmanager.data.text.TextWidthFixedDataFormat;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/**
 * This plugin parses EML 2.0.0 metadata files
 */
public class Eml200Parser implements ParserInterface {

    //private static String NAMESPACE = "eml://ecoinformatics.org/eml-2.0.0";
    private String nameSpace = null;
    private Hashtable<String, Entity> entityHash = new Hashtable<String, Entity>();
    private Vector<Entity> entityList = new Vector<Entity>();//this one will preserve the order of the entity.
    // private Hashtable fileHash = new Hashtable();
    private int numEntities = 0;
    private int numRecords = -1;
    private Entity entityObject = null;
    private DataTypeResolver dtr = DataTypeResolver.instanceOf();
    private int elementId = 0;
    // private boolean hasImageEntity = false;
    private int complexFormatsNumber = 0;
    private Hashtable<String, AttributeList> attributeListHash = new Hashtable<String, AttributeList>();
    private boolean hasMissingValue = false;

    private static Log log;
    private static boolean isDebugging;

    static {
        log = LogFactory.getLog("org.ecoinformatics.seek.datasource.eml.eml2.Eml200Parser");
        isDebugging = log.isDebugEnabled();
    }

    // constants
    public static final String TABLEENTITY = "//dataset/dataTable";
    public static final String SPATIALRASTERENTITY = "//dataset/spatialRaster";
    public static final String SPATIALVECTORENTITY = "//dataset/spatialVector";
    public static final String STOREDPROCEDUREENTITY = "//dataset/storedProcedure";
    public static final String VIEWENTITY = "//dataset/view";
    public static final String OTHERENTITY = "//dataset/otherEntity";
    public static final String EML = "eml";
    public static final String PACKAGEID = "packageId";
    private static final String INFORMATION = "information";

    /**
     * returns a hashtable of with the id of the entity as the key and the data
     * file id to which the entity refers as the value. This way, if you want to
     * know what data file goes with an entity, you can do a get on this hash
     * for the id of the entity. note that the entity id is the xml entity id
     * from the generated input step, not the id of the entity file itself.
     */
    /*
     * public Hashtable getDataFilesHash() { return fileHash; }
     */

    /**
     * parses the EML package using an InputSource
     */
    public void parse(InputSource source) throws Exception {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true);
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse(source);
        parseDocument(doc);
    }

    /**
     * parses the EML package using an InputStream
     */
    public void parse(InputStream is) throws Exception {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true);
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse(is);
        parseDocument(doc);
    }

    /*
     * parses the EML document. Now except dataTable, spatialRaster and
     * spatialVector entities are added.
     */
    private void parseDocument(Document doc) throws Exception {
        NodeList entities;
        NodeList spatialRasterEntities;
        NodeList spatialVectorEntities;
        NodeList otherEntities;
        NodeList viewEntities;
        Element root = doc.getDocumentElement();
        nameSpace = root.getNamespaceURI();
        //System.out.println("name space is ==== in the document "+nameSpace);
        CachedXPathAPI xpathapi = new CachedXPathAPI();
        try {
            // now dataTable, spatialRaster and spatialVector are handled
            entities = xpathapi.selectNodeList(doc, TABLEENTITY);
            spatialRasterEntities = xpathapi.selectNodeList(doc, SPATIALRASTERENTITY);
            spatialVectorEntities = xpathapi.selectNodeList(doc, SPATIALVECTORENTITY);
            otherEntities = xpathapi.selectNodeList(doc, OTHERENTITY);
            viewEntities = xpathapi.selectNodeList(doc, VIEWENTITY);

        } catch (Exception e) {
            throw new Exception("Error extracting entities from eml2.0.0 package.");
        }

        try {
            log.debug("Processing entities");
            processEntities(xpathapi, entities, TABLEENTITY);
            // TODO: current we still treat them as TableEntity java object,
            // in future we need add new SpatialRasterEntity and SpatialVector
            // object for them
            processEntities(xpathapi, spatialRasterEntities, SPATIALRASTERENTITY);
            processEntities(xpathapi, spatialVectorEntities, SPATIALVECTORENTITY);
            processEntities(xpathapi, otherEntities, OTHERENTITY);
            processEntities(xpathapi, viewEntities, VIEWENTITY);
            log.debug("Done processing entities");
        } catch (Exception e) {
            e.printStackTrace();
            throw new Exception("Error processing entities: " + e.getMessage());
        }
    }

    /**
     * returns a hashtable of entity names hashed to the entity description
     * metadata that goes with each entity.
     */
    public Hashtable<String, Entity> getEntityHash() {
        return entityHash;
    }

    /**
     * Get a collection of entities.
     * @return the collection of entities.
     */
    public List<Entity> getEntities() {
        return entityList;
    }

    /**
     * Get the name space of the document
     * @return the name space. If no name space, null will be returned.
     */
    public String getNameSpace() {
        return nameSpace;
    }

    /**
     * returns the number of records in this dataItem
     * 
     * @param entityId
     *            the id of the entity object to get the record count for
     */
    public int getRecordCount(String entityId) {
        return ((Entity) entityHash.get(entityId)).getNumRecords();
    }

    /**
     * returns the total number of entities in the data item collection that was
     * passed to this class when the object was created.
     */
    public int getEntityCount() {
        return numEntities;
    }

    /**
     * returns the number of attributes in the given entity
     * 
     * @param entityId
     *            the id of the entity object that you want the attribute count
     *            for
     */
    public int getAttributeCount(String entityId) {
        Attribute[] attArray = ((Entity) entityHash.get(entityId)).getAttributes();
        return attArray.length;
    }

    /**
     * if the entity has missing value declaretion
     * 
     *     */
    public boolean hasMissingValue() {
        return hasMissingValue;
    }

    /**
     * Method to get the boolean hasImageEntity. If the eml document has
     * SpatialRaster or SpatialVector entity, this variable should be true;
     * 
     * @return boolean
     */
    /*
     * public boolean getHasImageEntity() { return this.hasImageEntity;
     * 
     * }
     */

    /*
     * Porcess the attribute list element
     */
    private void processAttributeList(CachedXPathAPI xpathapi, NodeList attList, Entity entObj) throws Exception {
        AttributeList attributeList = new AttributeList();
        Node attListNode = attList.item(0);
        // get attributeList element's attribute - id
        NamedNodeMap idAttribute = attListNode.getAttributes();
        String idString = null;
        if (idAttribute != null) {
            Node id = idAttribute.getNamedItem("id");
            if (id != null) {
                idString = id.getNodeValue();
                if (isDebugging) {
                    log.debug("The id value for the attributelist is " + idString);
                }
            }
        }
        NodeList attNodeList = xpathapi.selectNodeList(attListNode, "attribute");
        NodeList referenceNodeList = xpathapi.selectNodeList(attListNode, "references");
        if (attNodeList != null && attNodeList.getLength() > 0) {

            processAttributes(xpathapi, attNodeList, attributeList);
            if (idString != null) {
                attributeListHash.put(idString, attributeList);

            }
        } else if (referenceNodeList != null && referenceNodeList.getLength() > 0) {
            // get the references id
            Node referenceNode = referenceNodeList.item(0);
            if (isDebugging) {
                log.debug("The reference node's name is " + referenceNode.getNodeName());
            }
            String referenceId = referenceNode.getFirstChild().getNodeValue();
            if (isDebugging) {
                log.debug("the reference id is " + referenceId);
            }
            attributeList = attributeListHash.get(referenceId);
        } else {
            log.debug("The children name of attribute list couldn't be understood");
            throw new Exception(" couldn't be a child of attributeList");
        }

        if (!entityObject.isSimpleDelimited()) {
            int length = attributeList.getAttributes().size();
            if (length != complexFormatsNumber || (length == complexFormatsNumber && complexFormatsNumber == 0)) {
                throw new Exception("Complex format elements should have" + " some number as attribute number");
            } else {
                // entityObject.setDataFormatArray(formatArray);
            }
        }

        entityObject.setAttributeList(attributeList);

    }

    /**
     * process the attributes
     */
    private void processAttributes(CachedXPathAPI xpathapi, NodeList atts, AttributeList attributeListObj)
            throws Exception {

        for (int i = 0; i < atts.getLength(); i++) { // go through each
            // attribute
            Node att = atts.item(i);
            NodeList attChildren = att.getChildNodes();
            NamedNodeMap attAttributes = att.getAttributes();

            String attName = "";
            String attLabel = "";
            String attDefinition = "";
            String attUnit = "";
            String attUnitType = "";
            String attStorageType = "";
            String attMeasurementScale = "";
            String attPrecision = "";
            Domain domain = null;
            Vector missingCodeVector = new Vector();

            elementId++;

            for (int j = 0; j < attChildren.getLength(); j++) {
                Node child = attChildren.item(j);
                String childName = child.getNodeName();
                if (childName.equals("attributeName")) {
                    attName = child.getFirstChild().getNodeValue().trim().replace('.', '_');
                } else if (childName.equals("attributeLabel")) {
                    attLabel = child.getFirstChild().getNodeValue().trim();
                } else if (childName.equals("attributeDefinition")) {
                    attDefinition = child.getFirstChild().getNodeValue().trim();
                } else if (childName.equals("measurementScale")) {
                    // unit is tricky because it can be custom or standard
                    // Vector info = new Vector();
                    // int domainType = Domain.DOM_NONE;
                    NodeList msNodeList = child.getChildNodes();
                    for (int k = 0; k < msNodeList.getLength(); k++) {
                        Node n = msNodeList.item(k);
                        String name = n.getNodeName();
                        if (name.equals("interval") || name.equals("ratio")) {
                            String numberType = null;
                            String min = "", max = "";
                            Node sUnit = xpathapi.selectSingleNode(n, "unit/standardUnit");
                            Node cUnit = xpathapi.selectSingleNode(n, "unit/customUnit");
                            if (sUnit != null) {
                                attUnit = sUnit.getFirstChild().getNodeValue();
                                attUnitType = Attribute.STANDARDUNIT;
                            } else if (cUnit != null) {
                                attUnit = cUnit.getFirstChild().getNodeValue();
                                attUnitType = Attribute.CUSTOMUNIT;
                            } else {
                                System.err.println("xpath didn't work");
                            }
                            Node precision = xpathapi.selectSingleNode(n, "precision");
                            if (precision != null) {
                                // precision is optional in EML201 so if it is
                                // not provided, the attPrecision will be the
                                // empty string
                                attPrecision = precision.getFirstChild().getNodeValue();
                            }
                            Node dNode = xpathapi.selectSingleNode(n, "numericDomain");
                            NodeList numberKids = dNode.getChildNodes();
                            for (int index = 0; index < numberKids.getLength(); index++) {

                                String dName = numberKids.item(index).getNodeName();
                                if (dName.equals("numberType")) // got number
                                // type
                                {
                                    numberType = numberKids.item(index).getFirstChild().getNodeValue();
                                    if (isDebugging) {
                                        log.debug("The number type is " + numberType);
                                    }
                                } else if (dName.equals("boundsGroup"))
                                // got bounds group
                                {
                                    NodeList boundsList = xpathapi.selectNodeList(dNode, "./bounds");
                                    for (i = 0; i < boundsList.getLength(); i++) {
                                        NodeList nl;
                                        Node bound;

                                        String exclMin = null, exclMax = null;
                                        try {
                                            nl = xpathapi.selectNodeList(boundsList.item(i), "./minimum");
                                            bound = nl.item(0);
                                            min = bound.getFirstChild().getNodeValue();
                                            exclMin = bound.getAttributes().getNamedItem("exclusive")
                                                    .getNodeValue();
                                            nl = xpathapi.selectNodeList(boundsList.item(0), "./maximum");
                                            bound = nl.item(0);
                                            max = bound.getFirstChild().getNodeValue();
                                            exclMax = bound.getAttributes().getNamedItem("exclusive")
                                                    .getNodeValue();
                                        } catch (Exception e) {
                                            log.debug("Error in handle bound ", e);
                                        }
                                    }

                                }

                            }
                            Double minNum = null;
                            Double maxNum = null;
                            if (!min.trim().equals("") && !max.trim().equals("")) {
                                minNum = new Double(min);
                                maxNum = new Double(max);
                            }
                            domain = new NumericDomain(numberType, minNum, maxNum);

                        } else if (name.equals("nominal") || name.equals("ordinal")) {
                            NodeList list = xpathapi.selectSingleNode(n, "nonNumericDomain").getChildNodes();
                            for (int m = 0; m < list.getLength(); m++) {
                                Node dNode = list.item(m);
                                String dName = dNode.getNodeName();
                                if (dName.equals("textDomain")) {
                                    TextDomain textDomain = new TextDomain();
                                    NodeList definitionL = xpathapi.selectNodeList(dNode, "./definition");
                                    Node defintionNode = definitionL.item(0);
                                    String definition = defintionNode.getFirstChild().getNodeValue();
                                    if (isDebugging) {
                                        log.debug("The definition value is " + definition);
                                    }
                                    textDomain.setDefinition(definition);
                                    NodeList nl = xpathapi.selectNodeList(dNode, "./pattern");
                                    String[] patternList = new String[nl.getLength()];
                                    for (int l = 0; l < nl.getLength(); l++) {
                                        patternList[l] = nl.item(l).getFirstChild().getNodeValue();
                                    }
                                    if (patternList.length > 0) {
                                        textDomain.setPattern(patternList);
                                    }
                                    domain = textDomain;

                                } else if (dName.equals("enumeratedDomain")) {
                                    EnumeratedDomain enumerDomain = new EnumeratedDomain();
                                    Vector info = new Vector();
                                    NodeList nl = xpathapi.selectNodeList(dNode, "./codeDefinition");
                                    for (int l = 0; l < nl.getLength(); l++) {
                                        info.add(nl.item(l).getFirstChild().getNodeValue());
                                    }
                                    enumerDomain.setInfo(info);
                                    domain = enumerDomain;

                                }
                            }

                        } else if (name.equalsIgnoreCase("datetime")) {
                            DateTimeDomain date = new DateTimeDomain();
                            String formatString = (xpathapi.selectSingleNode(n, "./formatString")).getFirstChild()
                                    .getNodeValue();
                            if (isDebugging) {
                                log.debug("The format string in date time is " + formatString);
                            }
                            date.setFormatString(formatString);
                            domain = date;

                        }
                    }

                } else if (childName.equals("missingValueCode")) {
                    log.debug("in missilng valueCode");
                    NodeList missingNodeList = child.getChildNodes();
                    for (int k = 0; k < missingNodeList.getLength(); k++) {
                        Node n = missingNodeList.item(k);
                        String name = n.getNodeName();
                        if (name.equals("code")) {

                            Node missingCodeTextNode = n.getFirstChild();
                            if (missingCodeTextNode != null) {
                                String missingCode = missingCodeTextNode.getNodeValue();
                                if (isDebugging) {
                                    log.debug("the missing code is " + missingCode);
                                }
                                missingCodeVector.add(missingCode);
                                hasMissingValue = true;
                            }
                        }
                    }

                }
            }

            String resolvedType;
            DataType dataType = domain.getDataType();
            resolvedType = dataType.getName();
            if (isDebugging) {
                log.debug("The final type is " + resolvedType);
            }

            //check for duplicates of this name
            if (attributeListObj.containsNamedAttribute(attName)) {
                int duplicateNameCounter = 1;
                while (attributeListObj.containsNamedAttribute(attName)) {
                    attName += "_" + duplicateNameCounter;
                    duplicateNameCounter++;
                }
            }

            Attribute attObj = new Attribute(Integer.toString(elementId), attName, attLabel, attDefinition, attUnit,
                    attUnitType, resolvedType, attMeasurementScale, domain);

            // add missing code into attribute
            for (int k = 0; k < missingCodeVector.size(); k++) {

                String missingCodeValue = (String) missingCodeVector.elementAt(k);
                if (isDebugging) {
                    log.debug("the mssing value code " + missingCodeValue + " was added to attribute");
                }
                attObj.addMissingValueCode(missingCodeValue);
            }

            attributeListObj.add(attObj);

        }
    }

    /**
     * pulls the entity information out of the xml and stores it in a hashtable
     */
    private void processEntities(CachedXPathAPI xpathapi, NodeList entities, String xpath)
            throws SAXException, javax.xml.transform.TransformerException, Exception {
        // make sure that entities is not null
        if (entities == null) {
            return;
        }
        int entityNodeListLength = entities.getLength();

        String entityName = "";
        String entityDescription = "";
        String entityOrientation = "";
        String entityCaseSensitive = "";
        String entityNumberOfRecords = "-1";
        String physicalFile = "";
        String numHeaderLines = "0";
        int numFooterLines = 0;
        String fieldDelimiter = null;
        String recordDelimiter = "";
        String compressionMethod = "";
        String encodingMethod = "";
        boolean isImageEntity = false;
        boolean isGZipDataFile = false;
        boolean isZipDataFile = false;
        boolean isTarDataFile = false;
        boolean isSimpleDelimited = true;
        boolean isCollapseDelimiter = false;
        TextComplexDataFormat[] formatArray = null;
        int entityCounter = 0;

        for (int i = 0; i < entityNodeListLength; i++) {

            if (xpath != null && (xpath.equals(SPATIALRASTERENTITY) || xpath.equals(SPATIALVECTORENTITY))) {
                isImageEntity = true;
            }
            // go through the entities and put the information into the hash.
            elementId++;
            Node entity = entities.item(i);
            NodeList entityChildren = entity.getChildNodes();
            for (int j = 0; j < entityChildren.getLength(); j++) {
                Node child = entityChildren.item(j);
                String childName = child.getNodeName();

                if (childName.equals("entityName")) {
                    entityName = child.getFirstChild().getNodeValue();
                } else if (childName.equals("entityDescription")) {
                    entityDescription = child.getFirstChild().getNodeValue();
                } else if (childName.equals("caseSensitive")) {
                    entityCaseSensitive = child.getFirstChild().getNodeValue();
                } else if (childName.equals("numberOfRecords")) {
                    entityNumberOfRecords = child.getFirstChild().getNodeValue();
                    if (entityNumberOfRecords != null) {
                        entityNumberOfRecords = entityNumberOfRecords.trim();
                        numRecords = (new Integer(entityNumberOfRecords)).intValue();
                    }
                }

            }

            NodeList orientationNodeList = xpathapi.selectNodeList(entity,
                    "physical/dataFormat/textFormat/attributeOrientation");
            if (orientationNodeList != null && orientationNodeList.getLength() > 0) {
                entityOrientation = orientationNodeList.item(0).getFirstChild().getNodeValue();

            }

            NodeList headerLinesNL = xpathapi.selectNodeList(entity,
                    "physical/dataFormat/textFormat/numHeaderLines");
            if ((headerLinesNL != null) && (headerLinesNL.getLength() > 0)) {
                Node headerLinesNode = headerLinesNL.item(0);
                if (headerLinesNode != null) {
                    numHeaderLines = headerLinesNode.getFirstChild().getNodeValue();
                }
            }

            NodeList footerLinesNL = xpathapi.selectNodeList(entity,
                    "physical/dataFormat/textFormat/numFooterLines");
            if ((footerLinesNL != null) && (footerLinesNL.getLength() > 0)) {
                Node footerLinesNode = footerLinesNL.item(0);
                if (footerLinesNode != null) {
                    String footerLineStr = footerLinesNode.getFirstChild().getNodeValue();
                    numFooterLines = (new Integer(footerLineStr.trim())).intValue();
                }
            }

            // Here is the simple delimited data file
            NodeList delimiterNL = xpathapi.selectNodeList(entity,
                    "physical/dataFormat/textFormat/simpleDelimited/fieldDelimiter");
            if (delimiterNL != null && delimiterNL.getLength() > 0) {
                fieldDelimiter = delimiterNL.item(0).getFirstChild().getNodeValue();
            }

            // Here is the simple delimited data file
            NodeList collapseDelimiterNL = xpathapi.selectNodeList(entity,
                    "physical/dataFormat/textFormat/simpleDelimited/collapseDelimiters");
            if (collapseDelimiterNL != null && collapseDelimiterNL.getLength() > 0) {
                String collapseDelimiter = collapseDelimiterNL.item(0).getFirstChild().getNodeValue();
                if (collapseDelimiter.equalsIgnoreCase("yes")) {
                    isCollapseDelimiter = true;
                }

            }

            // for complex format data file
            NodeList complexFormatNL = xpathapi.selectNodeList(entity, "physical/dataFormat/textFormat/complex");
            if (complexFormatNL != null && complexFormatNL.getLength() > 0) {
                log.debug("in handle complex text data format");
                isSimpleDelimited = false;
                Node complexFormatNode = complexFormatNL.item(0);
                NodeList complexFormatChildren = complexFormatNode.getChildNodes();
                int childrenLength = complexFormatChildren.getLength();
                Vector formatVector = new Vector();
                for (int k = 0; k < childrenLength; k++) {
                    Node node = complexFormatChildren.item(k);
                    if (node != null && node.getNodeName().equals("textFixed")) {
                        TextWidthFixedDataFormat textFixedFormat = handleTextFixedDataFormatNode(node);
                        if (textFixedFormat != null) {
                            formatVector.add(textFixedFormat);
                            // complexFormatsNumber++;
                        }

                    } else if (node != null && node.getNodeName().equals("textDelimited")) {
                        TextDelimitedDataFormat delimitedFormat = handleComplexDelimitedDataFormatNode(node);
                        if (delimitedFormat != null) {
                            formatVector.add(delimitedFormat);
                            // complexFormatsNumber++;
                        }
                    }
                }
                // transfer vector to array
                complexFormatsNumber = formatVector.size();
                formatArray = new TextComplexDataFormat[complexFormatsNumber];
                for (int j = 0; j < complexFormatsNumber; j++) {
                    formatArray[j] = (TextComplexDataFormat) formatVector.elementAt(j);
                }

            }

            NodeList recDelimiterNL = xpathapi.selectNodeList(entity,
                    "physical/dataFormat/textFormat/recordDelimiter");
            if ((recDelimiterNL != null) && (recDelimiterNL.getLength() > 0)) {
                recordDelimiter = recDelimiterNL.item(0).getFirstChild().getNodeValue();
            } else {
                recordDelimiter = "\r\n";
            }
            // get the distribution information
            NodeList distributionNL = xpathapi.selectNodeList(entity, "physical/distribution/online/url");
            if (distributionNL != null && distributionNL.getLength() > 0) {
                physicalFile = distributionNL.item(0).getFirstChild().getNodeValue();
                if (isDebugging) {
                    log.debug("The url is " + physicalFile);
                }
            }
            // if this url is donwloadable, if the value is "information", it is
            // not downloadable
            // otherwise, it is downloadable
            Boolean isDownloadable = true;
            NodeList distributionURLNL = xpathapi.selectNodeList(entity,
                    "physical/distribution/online/url/@function");
            if (distributionURLNL != null && distributionURLNL.getLength() > 0) {
                String function = distributionURLNL.item(0).getNodeValue();
                log.debug("The function value is ============ " + function);
                if (function != null && function.equals(INFORMATION)) {
                    isDownloadable = false;
                }
            }

            // get the compressionMethod information
            NodeList compressionNL = xpathapi.selectNodeList(entity, "physical/compressionMethod");
            if (compressionNL != null && compressionNL.getLength() > 0) {
                compressionMethod = compressionNL.item(0).getFirstChild().getNodeValue();
                if (isDebugging) {
                    log.debug("Compression method is " + compressionMethod);
                }
                if (compressionMethod != null && compressionMethod.equals(Entity.GZIP)) {
                    isGZipDataFile = true;
                } else if (compressionMethod != null && compressionMethod.equals(Entity.ZIP)) {
                    isZipDataFile = true;
                }
            }

            // get encoding method info (mainly for tar file)
            NodeList encodingNL = xpathapi.selectNodeList(entity, "physical/encodingMethod");
            if (encodingNL != null && encodingNL.getLength() > 0) {
                encodingMethod = encodingNL.item(0).getFirstChild().getNodeValue();
                if (isDebugging) {
                    log.debug("encoding method is " + encodingMethod);
                }
                if (encodingMethod != null && encodingMethod.equals(Entity.TAR)) {
                    isTarDataFile = true;
                }
            }

            if (entityOrientation.trim().equals("column")) {
                entityOrientation = Entity.COLUMNMAJOR;
            } else {
                entityOrientation = Entity.ROWMAJOR;
            }

            if (entityCaseSensitive.equals("yes")) {
                entityCaseSensitive = "true";
            } else {
                entityCaseSensitive = "false";
            }

            entityObject = new Entity(Integer.toString(elementId), entityName.trim(), entityDescription.trim(),
                    new Boolean(entityCaseSensitive), entityOrientation,
                    new Integer(entityNumberOfRecords).intValue());
            entityObject.setNumHeaderLines((new Integer(numHeaderLines)).intValue());
            entityObject.setNumFooterLines(numFooterLines);
            entityObject.setSimpleDelimited(isSimpleDelimited);
            // for simple dimited data file
            if (fieldDelimiter != null) {
                entityObject.setDelimiter(fieldDelimiter);
            }
            entityObject.setCollaplseDelimiter(isCollapseDelimiter);

            entityObject.setRecordDelimiter(recordDelimiter);
            entityObject.setURL(physicalFile);
            entityObject.setCompressionMethod(compressionMethod);
            entityObject.setIsImageEntity(isImageEntity);
            entityObject.setHasGZipDataFile(isGZipDataFile);
            entityObject.setHasZipDataFile(isZipDataFile);
            entityObject.setHasTarDataFile(isTarDataFile);
            entityObject.setDownloadable(isDownloadable);

            try {
                NodeList attNL = xpathapi.selectNodeList(entity, "attributeList");
                processAttributeList(xpathapi, attNL, entityObject);
                entityObject.setDataFormatArray(formatArray);

            } catch (Exception e) {
                log.warn("Error parsing attributes: " + e.getMessage() + " So this entity " + entityObject.getName()
                        + " may not have attribute list");
            }
            if (entityObject.isDownloadable()) {
                entityHash.put(Integer.toString(elementId), entityObject);
                entityList.add(entityObject);
                entityCounter++;
            }

            // fileHash.put(elementId, physicalFile);

        }
        numEntities = numEntities + entityCounter;

    }

    /*
     * This method will digest a text fixed data format node and return a
     * TextFixedDataFormat object.
     */
    private TextWidthFixedDataFormat handleTextFixedDataFormatNode(Node node) throws Exception {
        TextWidthFixedDataFormat format = null;
        if (node == null) {
            return format;
        }
        NodeList children = node.getChildNodes();
        int length = children.getLength();
        for (int i = 0; i < length; i++) {
            Node kid = children.item(i);
            String elementName = kid.getNodeName();
            if (elementName != null && elementName.equals("fieldWidth")) {
                String fieldWidthStr = kid.getFirstChild().getNodeValue();

                int fieldWidth = (new Integer(fieldWidthStr)).intValue();
                if (isDebugging) {
                    log.debug("The filed width for fix width in eml is " + fieldWidth);
                }
                format = new TextWidthFixedDataFormat(fieldWidth);
            } else if (elementName != null && elementName.equals("fieldStartColumn") && format != null) {
                String startColumnStr = kid.getFirstChild().getNodeValue();
                int startColumn = (new Integer(startColumnStr)).intValue();
                if (isDebugging) {
                    log.debug("The start column is " + startColumn);
                }
                format.setFieldStartColumn(startColumn);
            } else if (elementName != null && elementName.equals("lineNumber") && format != null) {
                String lineNumberStr = kid.getFirstChild().getNodeValue();
                int lineNumber = (new Integer(lineNumberStr)).intValue();
                if (isDebugging) {
                    log.debug("The start column is " + lineNumber);
                }
                format.setLineNumber(lineNumber);
            }
        }
        return format;
    }

    /*
     * This method will digest a delimited data format node and return a
     * DelimitedFixedFormat object.
     */
    private TextDelimitedDataFormat handleComplexDelimitedDataFormatNode(Node node) throws Exception {
        TextDelimitedDataFormat format = null;
        if (node == null) {
            return format;
        }
        NodeList children = node.getChildNodes();
        int length = children.getLength();
        Vector quoteList = new Vector();
        for (int i = 0; i < length; i++) {
            Node kid = children.item(i);
            String elementName = kid.getNodeName();
            if (elementName != null && elementName.equals("fieldDelimiter")) {
                String fieldDelimiter = kid.getFirstChild().getNodeValue();
                if (isDebugging) {
                    log.debug("The filed delimiter for complex format in eml is " + fieldDelimiter);
                }
                format = new TextDelimitedDataFormat(fieldDelimiter);
            } else if (elementName != null && elementName.equals("lineNumber") && format != null) {
                String lineNumberStr = kid.getFirstChild().getNodeValue();
                int lineNumber = (new Integer(lineNumberStr)).intValue();
                if (isDebugging) {
                    log.debug("The line number is " + lineNumber);
                }
                format.setLineNumber(lineNumber);
            } else if (elementName != null && elementName.equals("collapseDelimiter") && format != null) {
                String collapse = kid.getFirstChild().getNodeValue();
                if (isDebugging) {
                    log.debug("The collapse delimiter " + collapse);
                }
                format.setCollapseDelimiter(collapse);
            } else if (elementName != null && elementName.equals("quoteCharacter") && format != null) {
                String quote = kid.getFirstChild().getNodeValue();
                quoteList.add(quote);

            }

        }
        // set up quoteList
        if (format != null) {
            int size = quoteList.size();
            String[] quoteArray = new String[size];
            for (int i = 0; i < size; i++) {
                quoteArray[i] = (String) quoteList.elementAt(i);
            }
            format.setQuoteCharater(quoteArray);
        }
        return format;
    }
}