Java tutorial
/** * '$RCSfile: GenericDataPackageParser.java,v $' * * '$Author: leinfelder $' * '$Date: 2007-10-18 00:45:08 $' * '$Revision: 1.1 $' * * For Details: http://kepler.ecoinformatics.org * * Copyright (c) 2003 The Regents of the University of California. * All rights reserved. * * Permission is hereby granted, without written agreement and without * license or royalty fees, to use, copy, modify, and distribute this * software and its documentation for any purpose, provided that the * above copyright notice and the following two paragraphs appear in * all copies of this software. * * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN * IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY * OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, * UPDATES, ENHANCEMENTS, OR MODIFICATIONS. */ package org.ecoinformatics.datamanager.parser.generic; import java.io.InputStream; import java.util.ArrayList; import java.util.Hashtable; import java.util.Vector; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; //import org.apache.commons.logging.Log; //import org.apache.commons.logging.LogFactory; import org.apache.xpath.CachedXPathAPI; //import org.kepler.objectmanager.data.DataType; //import org.kepler.objectmanager.data.DataTypeResolver; import org.ecoinformatics.datamanager.parser.DataPackage; import org.ecoinformatics.datamanager.parser.DateTimeDomain; import org.ecoinformatics.datamanager.parser.Domain; import org.ecoinformatics.datamanager.parser.EnumeratedDomain; import org.ecoinformatics.datamanager.parser.NumericDomain; import org.ecoinformatics.datamanager.parser.Attribute; import org.ecoinformatics.datamanager.parser.AttributeList; import org.ecoinformatics.datamanager.parser.Entity; import org.ecoinformatics.datamanager.parser.StorageType; import org.ecoinformatics.datamanager.parser.TextComplexDataFormat; import org.ecoinformatics.datamanager.parser.TextDelimitedDataFormat; import org.ecoinformatics.datamanager.parser.TextDomain; import org.ecoinformatics.datamanager.parser.TextWidthFixedDataFormat; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.xml.sax.SAXException; /** * This is plugin Parser which parses EML 2.0.0 metadata files to * get the metadata information which decribes data file. * * Note that the term "generic" is misleading in that a generic document * needs to have an EML-compliant dataset element somewhere within it. * This class simply allows more general forms of schema to be parsed. * * @author tao * @author leinfelder (refactored to this form from orginal EML200Parser) */ public class GenericDataPackageParser implements DataPackageParserInterface { /* * Class fields */ // private static Log log; private static boolean isDebugging; private static final String ID = "id"; /*static { log = LogFactory.getLog( "org.ecoinformatics.seek.datasource.eml.eml2.Eml200Parser" ); isDebugging = log.isDebugEnabled(); }*/ /* * Instance fields */ // previously these were constants, now member variables with defaults protected String packageIdPath = null; protected String tableEntityPath = null; protected String spatialRasterEntityPath = null; protected String spatialVectorEntityPath = null; protected String storedProcedureEntityPath = null; protected String viewEntityPath = null; protected String otherEntityPath = null; //private Hashtable entityHash = new Hashtable(); //private Hashtable fileHash = new Hashtable(); private int numEntities = 0; //private int numRecords = -1; private Entity entityObject = null; //private DataTypeResolver dtr = DataTypeResolver.instanceOf(); private int elementId = 0; //private boolean hasImageEntity = false; private int numberOfComplexFormats = 0; // Associates attributeList id values with attributeList objects private Hashtable<String, AttributeList> attributeListIdHash = new Hashtable<String, AttributeList>(); //private boolean hasMissingValue = false; private DataPackage emlDataPackage = null; /** * Default constructor - no custom xpath parameters */ public GenericDataPackageParser() { //sets the default path values for documents this.initDefaultXPaths(); } /** * Constructor that accepts only the packageIdPath. * Allows packageId to be located anywhere in schema, * but assumes default (EML) placement of dataset * @param packageIdPath path expression specifying where to look for packageId */ public GenericDataPackageParser(String packageIdPath) { //sets the default path values for documents this.initDefaultXPaths(); //set the param this.packageIdPath = packageIdPath; } /** * Constructor that accepts xpath input strings * for many more datapackage element locations * @param packageIdPath * @param tableEntityPath * @param spatialRasterEntityPath * @param spatialVectorEntityPath * @param storedProcedureEntityPath * @param viewEntityPath * @param otherEntityPath */ public GenericDataPackageParser(String packageIdPath, String tableEntityPath, String spatialRasterEntityPath, String spatialVectorEntityPath, String storedProcedureEntityPath, String viewEntityPath, String otherEntityPath) { //set default so that caller can pass nulls for some params this.initDefaultXPaths(); //set the paths that are provided (not null) if (packageIdPath != null) { this.packageIdPath = packageIdPath; } if (tableEntityPath != null) { this.tableEntityPath = tableEntityPath; } if (spatialRasterEntityPath != null) { this.spatialRasterEntityPath = spatialRasterEntityPath; } if (spatialVectorEntityPath != null) { this.spatialVectorEntityPath = spatialVectorEntityPath; } if (storedProcedureEntityPath != null) { this.storedProcedureEntityPath = storedProcedureEntityPath; } if (viewEntityPath != null) { this.viewEntityPath = viewEntityPath; } if (otherEntityPath != null) { this.otherEntityPath = otherEntityPath; } } /** * sets the default xpath strings for locating datapackage elements * note that root element can be anything with a packageId attribute */ private void initDefaultXPaths() { //sets the default path values for documents packageIdPath = "//*/@packageId"; tableEntityPath = "//dataset/dataTable"; spatialRasterEntityPath = "//dataset/spatialRaster"; spatialVectorEntityPath = "//dataset/spatialVector"; storedProcedureEntityPath = "//dataset/storedProcedure"; viewEntityPath = "//dataset/view"; otherEntityPath = "//dataset/otherEntity"; } /** * Returns a hashtable of with the id of the entity as the key and the data * file id to which the entity refers as the value. This way, if you want to * know what data file goes with an entity, you can do a get on this hash * for the id of the entity. Note that the entity id is the XML entity id * from the generated input step, not the id of the entity file itself. * * @return fileHash, a HashTable of entity ids mapped to data file ids */ /*public Hashtable getDataFilesHash() { return fileHash; }*/ /* (non-Javadoc) * @see org.ecoinformatics.datamanager.parser.generic.GenericDatasetParserInterface#parse(org.xml.sax.InputSource) */ public void parse(InputSource source) throws Exception { DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); Document doc = builder.parse(source); parseDocument(doc); } /* (non-Javadoc) * @see org.ecoinformatics.datamanager.parser.generic.GenericDatasetParserInterface#parse(java.io.InputStream) */ public void parse(InputStream is) throws Exception { DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); Document doc = builder.parse(is); parseDocument(doc); } /** * Parses the EML document. Now except dataTable, spatialRaster and * spatialVector entities are added. * * @param doc the Document object to be parsed */ private void parseDocument(Document doc) throws Exception { NodeList dataTableEntities; NodeList spatialRasterEntities; NodeList spatialVectorEntities; NodeList otherEntities; NodeList viewEntities; CachedXPathAPI xpathapi = new CachedXPathAPI(); String packageId = null; try { // process packageid Node packageIdNode = xpathapi.selectSingleNode(doc, packageIdPath); if (packageIdNode != null) { //System.out.println("in packageIdNode is not null"); packageId = packageIdNode.getNodeValue(); } emlDataPackage = new DataPackage(packageId); // now dataTable, spatialRaster and spatialVector are handled dataTableEntities = xpathapi.selectNodeList(doc, tableEntityPath); spatialRasterEntities = xpathapi.selectNodeList(doc, spatialRasterEntityPath); spatialVectorEntities = xpathapi.selectNodeList(doc, spatialVectorEntityPath); otherEntities = xpathapi.selectNodeList(doc, otherEntityPath); viewEntities = xpathapi.selectNodeList(doc, viewEntityPath); } catch (Exception e) { throw new Exception("Error extracting entities from eml2.0.0 package.", e); } try { //log.debug("Processing entities"); processEntities(xpathapi, dataTableEntities, tableEntityPath, packageId); //TODO: current we still treat them as TableEntity java object, //in future we need add new SpatialRasterEntity and SpatialVector // object for them processEntities(xpathapi, spatialRasterEntities, spatialRasterEntityPath, packageId); processEntities(xpathapi, spatialVectorEntities, spatialVectorEntityPath, packageId); processEntities(xpathapi, otherEntities, otherEntityPath, packageId); processEntities(xpathapi, viewEntities, viewEntityPath, packageId); //log.debug("Done processing entities"); } catch (Exception e) { throw new Exception("Error processing entities: " + e.getMessage(), e); } } /** * Returns a hashtable of entity names hashed to the entity description * metadata that goes with each entity. */ /*public Hashtable getEntityHash() { return entityHash; }*/ /* (non-Javadoc) * @see org.ecoinformatics.datamanager.parser.generic.GenericDatasetParserInterface#getDataPackage() */ public DataPackage getDataPackage() { return emlDataPackage; } /** * Gets the number of records in this dataItem. * * @param entityId the id of the entity object to get the record count for * @return the number of records in the entity object */ /*public int getRecordCount(String entityId) { return ((Entity) entityHash.get(entityId)).getNumRecords(); }*/ /** * Gets the total number of entities in the data item collection that was * passed to this class when the object was created. * * @return the number of entities in the data item collection */ /*public int getEntityCount() { return numEntities; }*/ /** * Gets the number of attributes in the given entity. * * @param entityId the id of the entity object that you want the attribute * count for * @return the number of attributes in the entity */ /*public int getAttributeCount(String entityId) { Attribute[] attArray = ((Entity) entityHash.get(entityId)) .getAttributes(); return attArray.length; }*/ /** * Boolean to determine whether the entity has a missing value declaration. * * @return value of hasMissingValue, a boolean */ /*public boolean hasMissingValue() { return hasMissingValue; }*/ /** * Method to get the boolean hasImageEntity. If the eml document has * SpatialRaster or SpatialVector entity, this variable should be true. * * @return boolean, the value of the hasImageEntity field */ /*public boolean getHasImageEntity() { return this.hasImageEntity; }*/ /** * Processes the attributeList element. * * @param xpathapi XPath API * @param attributeListNodeList a NodeList * @param xpath the XPath path string to the data entity * @param entObj the entity object whose attribute list is processed */ private void processAttributeList(CachedXPathAPI xpathapi, NodeList attributeListNodeList, String xpath, Entity entObj) throws Exception { AttributeList attributeList = new AttributeList(); Node attributeListNode = attributeListNodeList.item(0); /* * It is allowable in EML to omit the attributeList for an * 'otherEntity' data entity. */ if (attributeListNode == null) { if (xpath != null && xpath.equals(otherEntityPath)) { System.err.println("No attributeList was specified for otherEntity '" + entObj.getName() + "'. This is allowable in EML."); return; } else { throw new Exception("No attributeList was specified for entity '" + entObj.getName() + "'."); } } // Get attributeList element's id attribute NamedNodeMap attributeListNodeAttributes = attributeListNode.getAttributes(); String idString = null; if (attributeListNodeAttributes != null) { Node idNode = attributeListNodeAttributes.getNamedItem(ID); if (idNode != null) { idString = idNode.getNodeValue(); attributeList.setId(idString); if (isDebugging) { //log.debug("The id value for the attributelist is " + idString); } } } NodeList attributeNodeList = xpathapi.selectNodeList(attributeListNode, "attribute"); NodeList referencesNodeList = xpathapi.selectNodeList(attributeListNode, "references"); if (attributeNodeList != null && attributeNodeList.getLength() > 0) { processAttributes(xpathapi, attributeNodeList, attributeList); if (idString != null) { attributeListIdHash.put(idString, attributeList); } } else if (referencesNodeList != null && referencesNodeList.getLength() > 0) { // get the references id Node referencesNode = referencesNodeList.item(0); if (isDebugging) { //log.debug("The reference node's name is "+ // referenceNode.getNodeName()); } String referencesId = referencesNode.getFirstChild().getNodeValue(); if (isDebugging) { //log.debug("the reference id is "+ referenceId); } attributeList = (AttributeList) attributeListIdHash.get(referencesId); } else { //log.debug( // "The children name of attribute list couldn't be understood"); throw new Exception(" couldn't be a child of attributeList"); } if (!entityObject.isSimpleDelimited()) { int numberOfAttributes = attributeList.getAttributes().length; if (numberOfAttributes != numberOfComplexFormats || ((numberOfAttributes == numberOfComplexFormats) && (numberOfComplexFormats == 0))) { throw new Exception("Complex format elements should have " + "same number as attribute number"); } else { //entityObject.setDataFormatArray(formatArray); } } entityObject.setAttributeList(attributeList); } /** * Processes the attributes in an attribute list. Called by * processAttributeList(). * * @param xpathapi the XPath API * @param attributesNodeList a node list * @param attributeList an AttributeList object */ private void processAttributes(CachedXPathAPI xpathapi, NodeList attributesNodeList, AttributeList attributeList) throws Exception { int attributesNodeListLength = attributesNodeList.getLength(); // Process each attribute for (int i = 0; i < attributesNodeListLength; i++) { Node attributeNode = attributesNodeList.item(i); NodeList attributeNodeChildren = attributeNode.getChildNodes(); //NamedNodeMap attAttributes = att.getAttributes(); String attName = ""; String attLabel = ""; String attDefinition = ""; String attUnit = ""; String attUnitType = ""; String attMeasurementScale = ""; String attPrecision = ""; Domain domain = null; String id = null; Vector missingValueCodeVector = new Vector(); double numberPrecision = 0; ArrayList<StorageType> storageTypeArray = new ArrayList<StorageType>(); // get attribute id NamedNodeMap attributeNodeAttributesMap = attributeNode.getAttributes(); if (attributeNodeAttributesMap != null) { Node idNode = attributeNodeAttributesMap.getNamedItem(ID); if (idNode != null) { id = idNode.getNodeValue(); } } elementId++; for (int j = 0; j < attributeNodeChildren.getLength(); j++) { Node childNode = attributeNodeChildren.item(j); String childNodeName = childNode.getNodeName(); String childNodeValue = childNode.getFirstChild() == null ? null : childNode.getFirstChild().getNodeValue(); childNodeValue = childNodeValue == null ? childNodeValue : childNodeValue.trim(); if (childNodeName.equals("attributeName")) { if (childNodeValue != null) { attName = childNodeValue.replace('.', '_'); } } else if (childNodeName.equals("attributeLabel")) { attLabel = childNodeValue; } else if (childNodeName.equals("attributeDefinition")) { attDefinition = childNodeValue; } // Process storageType elements else if (childNodeName.equals("storageType")) { String storageTypeTextValue = childNodeValue; NamedNodeMap storageTypeAttributesMap = childNode.getAttributes(); StorageType storageType; String typeSystem = ""; Node typeSystemNode = null; // Determine whether the typeSystem attribute was specified if (storageTypeAttributesMap != null) { typeSystemNode = storageTypeAttributesMap.getNamedItem(typeSystem); if (typeSystemNode != null) { typeSystem = typeSystemNode.getNodeValue(); } } // Use the appropriate StorageType constructor depending on // whether the 'typeSystem' attribute was specified if (!typeSystem.equals("")) { storageType = new StorageType(storageTypeTextValue, typeSystem); } else { storageType = new StorageType(storageTypeTextValue); } storageTypeArray.add(storageType); } else if (childNodeName.equals("measurementScale")) { //unit is tricky because it can be custom or standard //Vector info = new Vector(); //int domainType = Domain.DOM_NONE; NodeList measurementScaleChildNodes = childNode.getChildNodes(); for (int k = 0; k < measurementScaleChildNodes.getLength(); k++) { Node measurementScaleChildNode = measurementScaleChildNodes.item(k); String measurementScaleChildNodeName = measurementScaleChildNode.getNodeName(); if (measurementScaleChildNodeName.equals("interval") || measurementScaleChildNodeName.equals("ratio")) { String numberType = null; String min = "", max = ""; Node standardUnitNode = xpathapi.selectSingleNode(measurementScaleChildNode, "unit/standardUnit"); Node customUnitNode = xpathapi.selectSingleNode(measurementScaleChildNode, "unit/customUnit"); if (standardUnitNode != null) { attUnit = standardUnitNode.getFirstChild().getNodeValue(); attUnitType = Attribute.STANDARDUNIT; } else if (customUnitNode != null) { attUnit = customUnitNode.getFirstChild().getNodeValue(); attUnitType = Attribute.CUSTOMUNIT; } else { System.err.println("Unable to determine attribute unit."); } Node precisionNode = xpathapi.selectSingleNode(measurementScaleChildNode, "precision"); if (precisionNode != null) { // precision is optional in EML201 so if it is // not provided, the attPrecision will be the // empty string attPrecision = precisionNode.getFirstChild().getNodeValue(); numberPrecision = (new Double(attPrecision)).doubleValue(); } Node numericDomainNode = xpathapi.selectSingleNode(measurementScaleChildNode, "numericDomain"); NodeList numericDomainChildNodes = numericDomainNode.getChildNodes(); for (int index = 0; index < numericDomainChildNodes.getLength(); index++) { String numericDomainChildNodeName = numericDomainChildNodes.item(index) .getNodeName(); if (numericDomainChildNodeName.equals("numberType")) { // Got number type numberType = numericDomainChildNodes.item(index).getFirstChild().getNodeValue(); if (isDebugging) { //log.debug("The number type is "+ numberType); } } else if (numericDomainChildNodeName.equals("boundsGroup")) { // Got bounds group NodeList boundsNodeList = xpathapi.selectNodeList(numericDomainNode, "./bounds"); for (i = 0; i < boundsNodeList.getLength(); i++) { NodeList aNodeList; Node boundsNode; //String exclMin = null, exclMax = null; try { aNodeList = xpathapi.selectNodeList(boundsNodeList.item(i), "./minimum"); boundsNode = aNodeList.item(0); min = boundsNode.getFirstChild().getNodeValue(); /*exclMin = bound.getAttributes() .getNamedItem("exclusive") .getNodeValue();*/ aNodeList = xpathapi.selectNodeList(boundsNodeList.item(0), "./maximum"); boundsNode = aNodeList.item(0); max = boundsNode.getFirstChild().getNodeValue(); /*exclMax = bound.getAttributes() .getNamedItem("exclusive") .getNodeValue();*/ } catch (Exception e) { //log.debug("Error in handle bound ", e); } } } } Double minNum = null; Double maxNum = null; if (!min.trim().equals("")) { minNum = new Double(min); } if (!max.trim().equals("")) { maxNum = new Double(max); } NumericDomain numericDomain = new NumericDomain(numberType, minNum, maxNum); numericDomain.setPrecision(numberPrecision); domain = numericDomain; } else if (measurementScaleChildNodeName.equals("nominal") || measurementScaleChildNodeName.equals("ordinal")) { NodeList nonNumericDomainChildNodes = xpathapi .selectSingleNode(measurementScaleChildNode, "nonNumericDomain") .getChildNodes(); for (int m = 0; m < nonNumericDomainChildNodes.getLength(); m++) { Node nonNumericDomainChildNode = nonNumericDomainChildNodes.item(m); String nonNumericDomainChildNodeName = nonNumericDomainChildNode.getNodeName(); if (nonNumericDomainChildNodeName.equals("textDomain")) { TextDomain textDomain = new TextDomain(); NodeList definitionNodeList = xpathapi.selectNodeList(nonNumericDomainChildNode, "./definition"); Node defintionNode = definitionNodeList.item(0); String definition = defintionNode.getFirstChild() == null ? null : defintionNode.getFirstChild().getNodeValue(); if (isDebugging) { //log.debug( // "The definition value is "+definition); } textDomain.setDefinition(definition); NodeList patternNodeList = xpathapi.selectNodeList(nonNumericDomainChildNode, "./pattern"); String[] patternList = new String[patternNodeList.getLength()]; for (int l = 0; l < patternNodeList.getLength(); l++) { patternList[l] = patternNodeList.item(l).getFirstChild().getNodeValue(); } if (patternList.length > 0) { textDomain.setPattern(patternList); } domain = textDomain; } else if (nonNumericDomainChildNodeName.equals("enumeratedDomain")) { EnumeratedDomain enumeratedDomain = new EnumeratedDomain(); Vector info = new Vector(); NodeList codeDefinitionNodeList = xpathapi .selectNodeList(nonNumericDomainChildNode, "./codeDefinition"); for (int l = 0; l < codeDefinitionNodeList.getLength(); l++) { info.add(codeDefinitionNodeList.item(l).getFirstChild().getNodeValue()); } enumeratedDomain.setInfo(info); domain = enumeratedDomain; } } } else if (measurementScaleChildNodeName.equalsIgnoreCase("datetime")) { DateTimeDomain date = new DateTimeDomain(); String formatString = (xpathapi.selectSingleNode(measurementScaleChildNode, "./formatString")).getFirstChild().getNodeValue(); if (isDebugging) { //log.debug( // "The format string in date time is " // + formatString); } date.setFormatString(formatString); domain = date; } } } else if (childNodeName.equals("missingValueCode")) { //log.debug("in missingValueCode"); NodeList missingValueCodeChildNodes = childNode.getChildNodes(); for (int k = 0; k < missingValueCodeChildNodes.getLength(); k++) { Node missingValueCodeChildNode = missingValueCodeChildNodes.item(k); String missingValueCodeChildNodeName = missingValueCodeChildNode.getNodeName(); if (missingValueCodeChildNodeName.equals("code")) { Node missingValueCodeTextNode = missingValueCodeChildNode.getFirstChild(); if (missingValueCodeTextNode != null) { String missingValueCode = missingValueCodeTextNode.getNodeValue(); if (isDebugging) { //log.debug("the missing code is "+missingCode); } missingValueCodeVector.add(missingValueCode); //hasMissingValue = true; } } } } } /****************************************************** * need to use domain type to replace data type ******************************************************/ /*String resolvedType = null; //DataType dataType = domain.getDataType(); //resolvedType = dataType.getName(); if(isDebugging) { //log.debug("The final type is " + resolvedType); }*/ Attribute attObj = new Attribute(id, attName, attLabel, attDefinition, attUnit, attUnitType, attMeasurementScale, domain); // Add storageType elements to the Attribute object // if any were parsed in the EML for (StorageType storageType : storageTypeArray) { attObj.addStorageType(storageType); } // Add missing value code into attribute for (int k = 0; k < missingValueCodeVector.size(); k++) { String missingValueCode = (String) missingValueCodeVector.elementAt(k); if (isDebugging) { //log.debug("the mssing value code " + missingCodeValue + // " was added to attribute"); } attObj.addMissingValueCode(missingValueCode); } attributeList.add(attObj); } } /** * Pulls the entity information out of the XML and stores it in a hash table. */ private void processEntities(CachedXPathAPI xpathapi, NodeList entitiesNodeList, String xpath, String packageId) throws SAXException, javax.xml.transform.TransformerException, Exception { // Make sure that entities is not null if (entitiesNodeList == null) { return; } int entityNodeListLength = entitiesNodeList.getLength(); numEntities = numEntities + entityNodeListLength; String entityName = ""; String entityDescription = ""; String entityOrientation = ""; String entityCaseSensitive = ""; String entityNumberOfRecords = "-1"; String onlineUrl = ""; String format = null; String numHeaderLines = "0"; int numFooterLines = 0; String fieldDelimiter = null; String recordDelimiter = ""; String compressionMethod = ""; String encodingMethod = ""; String quoteCharacter = null; String literalCharacter = null; boolean isImageEntity = false; boolean isOtherEntity = false; boolean isGZipDataFile = false; boolean isZipDataFile = false; boolean isTarDataFile = false; boolean isSimpleDelimited = true; boolean isCollapseDelimiters = false; TextComplexDataFormat[] formatArray = null; for (int i = 0; i < entityNodeListLength; i++) { if (xpath != null) { if (xpath.equals(spatialRasterEntityPath) || xpath.equals(spatialVectorEntityPath)) { isImageEntity = true; } else if (xpath.equals(otherEntityPath)) { isOtherEntity = true; } } //go through the entities and put the information into the hash. elementId++; Node entityNode = entitiesNodeList.item(i); String id = null; NamedNodeMap entityNodeAttributes = entityNode.getAttributes(); if (entityNodeAttributes != null) { Node idNode = entityNodeAttributes.getNamedItem(ID); if (idNode != null) { id = idNode.getNodeValue(); } } NodeList entityNodeChildren = entityNode.getChildNodes(); for (int j = 0; j < entityNodeChildren.getLength(); j++) { Node childNode = entityNodeChildren.item(j); String childName = childNode.getNodeName(); String childValue = childNode.getFirstChild() == null ? null : childNode.getFirstChild().getNodeValue(); if (childName.equals("entityName")) { entityName = childValue; } else if (childName.equals("entityDescription")) { entityDescription = childValue; } else if (childName.equals("caseSensitive")) { entityCaseSensitive = childValue; } else if (childName.equals("numberOfRecords")) { entityNumberOfRecords = childValue; /*numRecords = (new Integer(entityNumberOfRecords)) .intValue();*/ } } NodeList attributeOrientationNodeList = xpathapi.selectNodeList(entityNode, "physical/dataFormat/textFormat/attributeOrientation"); if (attributeOrientationNodeList != null && attributeOrientationNodeList.getLength() > 0) { entityOrientation = attributeOrientationNodeList.item(0).getFirstChild().getNodeValue(); } NodeList numHeaderLinesNodeList = xpathapi.selectNodeList(entityNode, "physical/dataFormat/textFormat/numHeaderLines"); if ((numHeaderLinesNodeList != null) && (numHeaderLinesNodeList.getLength() > 0)) { Node numHeaderLinesNode = numHeaderLinesNodeList.item(0); if (numHeaderLinesNode != null) { numHeaderLines = numHeaderLinesNode.getFirstChild().getNodeValue(); } } NodeList numFooterLinesNodeList = xpathapi.selectNodeList(entityNode, "physical/dataFormat/textFormat/numFooterLines"); if ((numFooterLinesNodeList != null) && (numFooterLinesNodeList.getLength() > 0)) { Node numFooterLinesNode = numFooterLinesNodeList.item(0); if (numFooterLinesNode != null) { String numFooterLinesStr = numFooterLinesNode.getFirstChild().getNodeValue(); numFooterLines = (new Integer(numFooterLinesStr.trim())).intValue(); } } // Here is the simple delimited data file NodeList fieldDelimiterNodeList = xpathapi.selectNodeList(entityNode, "physical/dataFormat/textFormat/simpleDelimited/fieldDelimiter"); if (fieldDelimiterNodeList != null && fieldDelimiterNodeList.getLength() > 0) { fieldDelimiter = fieldDelimiterNodeList.item(0).getFirstChild().getNodeValue(); } NodeList collapseDelimitersNodeList = xpathapi.selectNodeList(entityNode, "physical/dataFormat/textFormat/simpleDelimited/collapseDelimiters"); if (collapseDelimitersNodeList != null && collapseDelimitersNodeList.getLength() > 0) { String collapseDelimiters = collapseDelimitersNodeList.item(0).getFirstChild().getNodeValue(); if (collapseDelimiters.equalsIgnoreCase("yes")) { isCollapseDelimiters = true; } } NodeList quoteCharacterNodeList = xpathapi.selectNodeList(entityNode, "physical/dataFormat/textFormat/simpleDelimited/quoteCharacter"); if (quoteCharacterNodeList != null && quoteCharacterNodeList.getLength() > 0) { quoteCharacter = quoteCharacterNodeList.item(0).getFirstChild().getNodeValue(); } NodeList literalCharacterNodeList = xpathapi.selectNodeList(entityNode, "physical/dataFormat/textFormat/simpleDelimited/literalCharacter"); if (literalCharacterNodeList != null && literalCharacterNodeList.getLength() > 0) { literalCharacter = literalCharacterNodeList.item(0).getFirstChild().getNodeValue(); } // For complex format data file NodeList complexNodeList = xpathapi.selectNodeList(entityNode, "physical/dataFormat/textFormat/complex"); if (complexNodeList != null && complexNodeList.getLength() > 0) { //log.debug("in handle complex text data format"); isSimpleDelimited = false; Node complexNode = complexNodeList.item(0); NodeList complexChildNodes = complexNode.getChildNodes(); int complexChildNodesLength = complexChildNodes.getLength(); Vector formatVector = new Vector(); for (int k = 0; k < complexChildNodesLength; k++) { Node complexChildNode = complexChildNodes.item(k); if (complexChildNode != null && complexChildNode.getNodeName().equals("textFixed")) { TextWidthFixedDataFormat textWidthFixedDataFormat = handleTextFixedDataFormatNode( complexChildNode); if (textWidthFixedDataFormat != null) { formatVector.add(textWidthFixedDataFormat); //complexFormatsNumber++; } } else if (complexChildNode != null && complexChildNode.getNodeName().equals("textDelimited")) { TextDelimitedDataFormat textDelimitedDataFormat = handleComplexDelimitedDataFormatNode( complexChildNode); if (textDelimitedDataFormat != null) { formatVector.add(textDelimitedDataFormat); //complexFormatsNumber++; } } } // Transfer vector to array numberOfComplexFormats = formatVector.size(); formatArray = new TextComplexDataFormat[numberOfComplexFormats]; for (int j = 0; j < numberOfComplexFormats; j++) { formatArray[j] = (TextComplexDataFormat) formatVector.elementAt(j); } } NodeList recordDelimiterNodeList = xpathapi.selectNodeList(entityNode, "physical/dataFormat/textFormat/recordDelimiter"); if ((recordDelimiterNodeList != null) && (recordDelimiterNodeList.getLength() > 0)) { recordDelimiter = recordDelimiterNodeList.item(0).getFirstChild().getNodeValue(); } else { recordDelimiter = "\\r\\n"; } // Get the distribution information NodeList urlNodeList = xpathapi.selectNodeList(entityNode, "physical/distribution/online/url"); if (urlNodeList != null && urlNodeList.getLength() > 0) { onlineUrl = urlNodeList.item(0).getFirstChild().getNodeValue(); if (isDebugging) { //log.debug("The url is "+ onlineUrl); } } /** * Determine file format (mime) * Note: this could be better fleshed out in cases where the delimiter is known * * physical/dataFormat/textFormat * physical/dataFormat/binaryRasterFormat * physical/dataFormat/externallyDefinedFormat/formatName */ NodeList formatNodeList = xpathapi.selectNodeList(entityNode, "physical/dataFormat/externallyDefinedFormat/formatName"); if (formatNodeList != null && formatNodeList.getLength() > 0) { format = formatNodeList.item(0).getFirstChild().getNodeValue(); } else { // try binary raster formatNodeList = xpathapi.selectNodeList(entityNode, "physical/dataFormat/binaryRasterFormat"); if (formatNodeList != null && formatNodeList.getLength() > 0) { format = "application/octet-stream"; } else { formatNodeList = xpathapi.selectNodeList(entityNode, "physical/dataFormat/textFormat"); if (formatNodeList != null && formatNodeList.getLength() > 0) { format = "text/plain"; } if (isSimpleDelimited) { format = "text/csv"; } } } // Get the compressionMethod information NodeList compressionMethodNodeList = xpathapi.selectNodeList(entityNode, "physical/compressionMethod"); if (compressionMethodNodeList != null && compressionMethodNodeList.getLength() > 0) { compressionMethod = compressionMethodNodeList.item(0).getFirstChild().getNodeValue(); if (isDebugging) { //log.debug("Compression method is "+compressionMethod); } if (compressionMethod != null && compressionMethod.equals(Entity.GZIP)) { isGZipDataFile = true; } else if (compressionMethod != null && compressionMethod.equals(Entity.ZIP)) { isZipDataFile = true; } } // Get encoding method info (mainly for tar file) NodeList encodingMethodNodeList = xpathapi.selectNodeList(entityNode, "physical/encodingMethod"); if (encodingMethodNodeList != null && encodingMethodNodeList.getLength() > 0) { encodingMethod = encodingMethodNodeList.item(0).getFirstChild().getNodeValue(); if (isDebugging) { //log.debug("encoding method is "+encodingMethod); } if (encodingMethod != null && encodingMethod.equals(Entity.TAR)) { isTarDataFile = true; } } if (entityOrientation.trim().equals("column")) { entityOrientation = Entity.COLUMNMAJOR; } else { entityOrientation = Entity.ROWMAJOR; } if (entityCaseSensitive.equals("yes")) { entityCaseSensitive = "true"; } else { entityCaseSensitive = "false"; } entityObject = new Entity(id, entityName == null ? null : entityName.trim(), entityDescription == null ? null : entityDescription.trim(), new Boolean(entityCaseSensitive), entityOrientation, new Integer(entityNumberOfRecords).intValue()); entityObject.setNumHeaderLines((new Integer(numHeaderLines)).intValue()); entityObject.setNumFooterLines(numFooterLines); entityObject.setSimpleDelimited(isSimpleDelimited); // For simple delimited data file if (fieldDelimiter != null) { entityObject.setDelimiter(fieldDelimiter); } if (quoteCharacter != null) { entityObject.setQuoteCharacter(quoteCharacter); } if (literalCharacter != null) { entityObject.setLiteralCharacter(literalCharacter); } entityObject.setCollapseDelimiters(isCollapseDelimiters); entityObject.setRecordDelimiter(recordDelimiter); entityObject.setURL(onlineUrl); entityObject.setDataFormat(format); entityObject.setCompressionMethod(compressionMethod); entityObject.setIsImageEntity(isImageEntity); entityObject.setIsOtherEntity(isOtherEntity); entityObject.setHasGZipDataFile(isGZipDataFile); entityObject.setHasZipDataFile(isZipDataFile); entityObject.setHasTarDataFile(isTarDataFile); entityObject.setPackageId(packageId); try { NodeList attributeListNodeList = xpathapi.selectNodeList(entityNode, "attributeList"); processAttributeList(xpathapi, attributeListNodeList, xpath, entityObject); entityObject.setDataFormatArray(formatArray); } catch (Exception e) { throw new Exception("Error parsing attributes: " + e.getMessage(), e); } //entityHash.put(Integer.toString(elementId), entityObject); emlDataPackage.add(entityObject); //fileHash.put(elementId, onlineUrl); } // end for loop } /** * This method will digest a text fixed data format node and return * a TextFixedDataFormat object. * * @param node the Node object to be processed */ private TextWidthFixedDataFormat handleTextFixedDataFormatNode(Node node) throws Exception { TextWidthFixedDataFormat textWidthFixedDataFormat = null; if (node == null) { return textWidthFixedDataFormat; } NodeList childNodes = node.getChildNodes(); int length = childNodes.getLength(); for (int i = 0; i < length; i++) { Node childNode = childNodes.item(i); String elementName = childNode.getNodeName(); if (elementName != null && elementName.equals("fieldWidth")) { String fieldWidthStr = childNode.getFirstChild().getNodeValue(); int fieldWidth = (new Integer(fieldWidthStr)).intValue(); if (isDebugging) { //log.debug("The filed width for fix width in eml is " // + fieldWidth); } textWidthFixedDataFormat = new TextWidthFixedDataFormat(fieldWidth); } else if (elementName != null && elementName.equals("fieldStartColumn") && textWidthFixedDataFormat != null) { String startColumnStr = childNode.getFirstChild().getNodeValue(); int startColumn = (new Integer(startColumnStr)).intValue(); if (isDebugging) { //log.debug("The start column is " + startColumn); } textWidthFixedDataFormat.setFieldStartColumn(startColumn); } else if (elementName != null && elementName.equals("lineNumber") && textWidthFixedDataFormat != null) { String lineNumberStr = childNode.getFirstChild().getNodeValue(); int lineNumber = (new Integer(lineNumberStr)).intValue(); if (isDebugging) { //log.debug("The start column is " + lineNumber); } textWidthFixedDataFormat.setLineNumber(lineNumber); } } return textWidthFixedDataFormat; } /* * This method will digest a complex delimited data format node * and return a TextDelimitedDataFormat object. */ private TextDelimitedDataFormat handleComplexDelimitedDataFormatNode(Node node) throws Exception { TextDelimitedDataFormat textDelimitedDataFormat = null; if (node == null) { return textDelimitedDataFormat; } NodeList childNodes = node.getChildNodes(); int length = childNodes.getLength(); Vector quoteList = new Vector(); for (int i = 0; i < length; i++) { Node childNode = childNodes.item(i); String elementName = childNode.getNodeName(); if (elementName != null && elementName.equals("fieldDelimiter")) { String fieldDelimiter = childNode.getFirstChild().getNodeValue(); if (isDebugging) { //log.debug("The field delimiter for complex format in eml is " + // fieldDelimiter); } textDelimitedDataFormat = new TextDelimitedDataFormat(fieldDelimiter); } else if (elementName != null && elementName.equals("lineNumber") && textDelimitedDataFormat != null) { String lineNumberStr = childNode.getFirstChild().getNodeValue(); int lineNumber = (new Integer(lineNumberStr)).intValue(); if (isDebugging) { //log.debug("The line number is " + lineNumber); } textDelimitedDataFormat.setLineNumber(lineNumber); } else if (elementName != null && elementName.equals("collapseDelimiters") && textDelimitedDataFormat != null) { String collapseDelimiters = childNode.getFirstChild().getNodeValue(); if (isDebugging) { //log.debug("The collapse delimiter: " + collapse); } textDelimitedDataFormat.setCollapseDelimiters(collapseDelimiters); } else if (elementName != null && elementName.equals("quoteCharacter") && textDelimitedDataFormat != null) { String quoteCharacter = childNode.getFirstChild().getNodeValue(); quoteList.add(quoteCharacter); } } // end for loop // set up quoteList if (textDelimitedDataFormat != null) { int size = quoteList.size(); String[] quoteCharacterArray = new String[size]; for (int i = 0; i < size; i++) { quoteCharacterArray[i] = (String) quoteList.elementAt(i); } textDelimitedDataFormat.setQuoteCharacterArray(quoteCharacterArray); } return textDelimitedDataFormat; } }