de.iai.ilcd.xml.read.DataSetReader.java Source code

Introduction

Here is the source code for de.iai.ilcd.xml.read.DataSetReader.java
Source

/*******************************************************************************
 * Copyright (c) 2011 Karlsruhe Institute of Technology (KIT) - Institute for
 * Applied Computer Science (IAI).
 * 
 * This file is part of soda4LCA - the Service-Oriented Life Cycle Data Store.
 * 
 * soda4LCA is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by the
 * Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * soda4LCA is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with soda4LCA. If not, see <http://www.gnu.org/licenses/>.
 ******************************************************************************/

package de.iai.ilcd.xml.read;

import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.List;

import org.apache.commons.io.IOUtils;
import org.apache.commons.jxpath.JXPathContext;
import org.apache.commons.jxpath.xml.JDOMParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import de.fzk.iai.ilcd.service.model.common.IMultiLangString;
import de.iai.ilcd.model.common.Classification;
import de.iai.ilcd.model.common.DataSet;
import de.iai.ilcd.model.common.DataSetType;
import de.iai.ilcd.model.common.DataSetVersion;
import de.iai.ilcd.model.common.Uuid;
import de.iai.ilcd.model.common.XmlFile;
import de.iai.ilcd.model.common.exception.FormatException;
import de.schlichtherle.io.File;
import de.schlichtherle.io.FileInputStream;

/**
 * 
 * @author clemens.duepmeier
 */
public abstract class DataSetReader {

    private static final Logger logger = LoggerFactory.getLogger(DataSetReader.class);

    protected DataSetParsingHelper parserHelper = null;

    protected CommonConstructsReader commonReader = null;

    protected String dataSetFileName = null;

    PrintWriter out = null;

    public DataSet readFromFile(String fileName, PrintWriter out)
            throws FileNotFoundException, IOException, UnsupportedEncodingException {

        File file = new File(fileName);
        return readDataSetFromFile(file, out);

    }

    public String getDataSetFileName() {
        return dataSetFileName;
    }

    public DataSet readDataSetFromFile(File file, PrintWriter out)
            throws FileNotFoundException, IOException, UnsupportedEncodingException {

        dataSetFileName = file.getAbsolutePath();
        this.out = out;

        DataSet dataSet = null;

        FileInputStream inputStream = null;
        try {
            inputStream = new FileInputStream(file);
        } catch (Exception e) {
            out.println("Warning: Cannot find file " + file.getName());
            throw new FileNotFoundException("File " + file.getName() + " cannot be found");
        }
        dataSet = readDataSetFromStream(inputStream, out);
        inputStream.close();

        return dataSet;
    }

    // TODO: this method does not work with Source data sets, because source
    // data sets need to find referenced digital files
    // and this assumes that the files can be found via the path of the source
    // data set. But this path is not given with
    // only the InputStream
    public DataSet readDataSetFromStream(InputStream inStream, PrintWriter out)
            throws IOException, UnsupportedEncodingException {
        byte[] contents = null;
        try {
            contents = IOUtils.toByteArray(inStream);
        } catch (IOException e) {
            logger.error("cannot read the whole input stream into byte array");
            throw e;
        }
        DataSet dataSet = this.parseStream(new ByteArrayInputStream(contents));

        XmlFile xmlFile = new XmlFile();
        String xml;
        try {
            xml = new String(contents, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            logger.error("cannot parse stream input into an UTF-8 string");
            logger.error("Exception is: ", e);
            throw e;
        }
        // if BOM is in front of ByteArray, we may have some unknown characters in front of <xml because
        // UTF-8 conversion in Standard Java has problems with BOM, let's just filter these nasty characters
        if (!xml.startsWith("<"))
            xml = xml.trim().replaceFirst("^([\\W]+)<", "<"); // remove junk at front of dataset
        xmlFile.setContent(xml);
        dataSet.setXmlFile(xmlFile);

        return dataSet;
    }

    private DataSet parseStream(InputStream inputStream) {

        JDOMParser parser = new JDOMParser();
        parser.setValidating(false);
        Object doc = parser.parseXML(inputStream);
        JXPathContext context = JXPathContext.newContext(doc);
        context.setLenient(true);
        context.registerNamespace("common", "http://lca.jrc.it/ILCD/Common");

        parserHelper = new DataSetParsingHelper(context);
        commonReader = new CommonConstructsReader(parserHelper);

        return parse(context, out);
    }

    protected abstract DataSet parse(JXPathContext context, PrintWriter out);

    public void readCommonFields(DataSet dataset, DataSetType dataSetType, JXPathContext context) {

        IMultiLangString name = parserHelper.getIMultiLanguageString("//common:name");
        Uuid uuid = commonReader.getUuid();
        IMultiLangString generalComment = parserHelper.getIMultiLanguageString("//common:generalComment");
        String permanentUri = parserHelper.getStringValue("//common:permanentDataSetURI");
        String versionString = parserHelper.getStringValue("//common:dataSetVersion");

        dataset.setName(name);
        dataset.setUuid(uuid);
        dataset.setDescription(generalComment);
        dataset.setPermanentUri(permanentUri);
        DataSetVersion version = new DataSetVersion();
        try {
            if (versionString != null)
                version = DataSetVersion.parse(versionString);
            else {
                if (out != null)
                    out.println("Warning: This data set has no version number; version will be set to 00.00.000");
            }
        } catch (FormatException ex) {
            if (out != null) {
                out.println("Warning: The version number has an invalid format. See below for details.");
                out.println("Exception output: " + ex.getMessage());
            }
        }
        dataset.setVersion(version);

        // get the list of classifications and add them
        List<Classification> classifications = commonReader.getClassifications(dataSetType);
        for (Classification classification : classifications) {
            // set default classification
            if (classification.getName().equalsIgnoreCase("ilcd")) {
                dataset.setClassification(classification);
            }
        }
        if (dataset.getClassification().getClasses().isEmpty() && classifications.size() > 0) {
            dataset.setClassification(classifications.get(0)); // we need a default classification system, even
            // if it's not the ILCD classification
        }
    }
}