eionet.gdem.conversion.odf.OdsReader.java Source code

Java tutorial

Introduction

Here is the source code for eionet.gdem.conversion.odf.OdsReader.java

Source

/**
 * The contents of this file are subject to the Mozilla Public
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * The Original Code is " GDEM project".
 *
 * The Initial Developer of the Original Code is TietoEnator.
 * The Original Code code was developed for the European
 * Environment Agency (EEA) under the IDA/EINRC framework contract.
 *
 * Copyright (C) 2000-2004 by European Environment Agency.  All
 * Rights Reserved.
 *
 * Original Code: Enriko Ksper (TietoEnator)
 * Created on 27.04.2006
 */
package eionet.gdem.conversion.odf;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

import org.apache.commons.lang3.StringUtils;

import com.catcode.odf.ODFMetaFileAnalyzer;
import com.catcode.odf.OpenDocumentMetadata;

import eionet.gdem.GDEMException;
import eionet.gdem.conversion.datadict.DDElement;
import eionet.gdem.conversion.datadict.DD_XMLInstance;
import eionet.gdem.conversion.excel.reader.DDXmlElement;
import eionet.gdem.conversion.spreadsheet.DDXMLConverter;
import eionet.gdem.conversion.spreadsheet.SourceReaderIF;
import eionet.gdem.conversion.spreadsheet.SourceReaderLogger;
import eionet.gdem.conversion.spreadsheet.SourceReaderLogger.ReaderTypeEnum;
import eionet.gdem.dto.ConversionResultDto;
import eionet.gdem.utils.Utils;

/**
 * The class is responsible for reading OpenDocument Spreadsheets.
 *
 * @author Enriko Ksper
 */

public class OdsReader implements SourceReaderIF {

    /**Object containing OpenDocument file attributes. */
    private OpenDocumentMetadata metadata = null;

    /** Object containing OpenDocument file sheets' data. */
    private OpenDocumentSpreadsheet spreadsheet = null;

    /** OpenDocument spreadsheet attribute name containing DD dataset schema url.*/
    public static final String SCHEMA_ATTR_NAME = "schema-url";

    /** OpenDocument spreadsheet attribute name containing DD table schema urls.*/
    public static final String TBL_SCHEMAS_ATTR_NAME = "table-schema-urls";

    /** Separator tokenizing table schmeas in TBL_SCHEMAS_ATTR_NAME attribute.*/
    private static final String TBL_SEPARATOR = ";";

    /** */
    private static final String TBL_PROPERTIES_SEPARATOR = ",";

    /** */
    public static final String TABLE_NAME = "tableName=";

    /** */
    public static final String TABLE_SCHEMA_URL = "tableSchemaURL=";

    /** */
    private SourceReaderLogger readerLogger;

    /** */
    List<String> odsSheetNames = new ArrayList<String>();
    /** Ods file size. */
    private long inputFileLength = 0;

    @Override
    public String getXMLSchema() {
        String ret = null;
        Hashtable usermetadata = metadata.getUserDefined();
        if (usermetadata.containsKey(SCHEMA_ATTR_NAME)) {
            ret = (String) usermetadata.get(SCHEMA_ATTR_NAME);
        }
        return ret;
    }

    /*
     * Intializes OdsReader. Reades ODS file into Java objects
     * @see eionet.gdem.conversion.SourceReaderIF#initReader(java.io.InputStream)
     * @param InputStream input: Source ods file
     */
    @Override
    public void initReader(File inFile) throws GDEMException {
        if (inFile == null) {
            throw new GDEMException("Input file is missing");
        }
        try {
            // ODF analyzer closes the stream after parsing content.
            ODFSpreadsheetAnalyzer odfSpreadsheetAnalyzer = new ODFSpreadsheetAnalyzer();
            spreadsheet = odfSpreadsheetAnalyzer.analyzeZip(new FileInputStream(inFile));

            ODFMetaFileAnalyzer odfMetaAnalyzer = new ODFMetaFileAnalyzer();
            metadata = odfMetaAnalyzer.analyzeZip(new FileInputStream(inFile));

        } catch (IOException e) {
            throw new GDEMException("Unable to open ODS file. ", e);
        }
        inputFileLength = inFile.length();
    }

    @Override
    public void startReader(ConversionResultDto resultObject) {
        readerLogger = new SourceReaderLogger(resultObject, ReaderTypeEnum.ODS);
        readerLogger.logStartWorkbook();
        odsSheetNames = getSheetNames();
        readerLogger.logNumberOfSheets(spreadsheet.getTables().size(), StringUtils.join(odsSheetNames, ", "));
    }

    @Override
    public void closeReader() {
        readerLogger.logEndWorkbook(inputFileLength);
    }

    /*
     * (non-Javadoc)
     * @see eionet.gdem.conversion.SourceReaderIF#writeContentToInstance(eionet.gdem.conversion.excel.DD_XMLInstance)
     */
    @Override
    public void writeContentToInstance(DD_XMLInstance instance) throws Exception {
        List<DDXmlElement> tables = instance.getTables();
        if (tables == null || spreadsheet == null) {
            readerLogger.logNoDefinitionsForTables();
            return;
        }

        for (int i = 0; i < tables.size(); i++) {
            DDXmlElement table = tables.get(i);
            String tblLocalName = table.getLocalName();
            String tblName = table.getName();
            String tblAttrs = table.getAttributes();

            readerLogger.logStartSheet(tblLocalName);
            readerLogger.logSheetSchema(instance.getInstanceUrl(), tblLocalName);
            if (!odsSheetNames.contains(tblLocalName)) {
                readerLogger.logSheetNotFound(tblLocalName);
            }

            List<List<String>> listTableData = spreadsheet.getTableData(tblLocalName);
            List<List<String>> listMetaTableData = getMetaTableData(tblLocalName);

            if (listTableData == null) {
                readerLogger.logEmptySheet(tblLocalName);
                continue;
            }
            List<DDXmlElement> elements = instance.getTblElements(tblName);
            List<String> headerRow = spreadsheet.getTableHeader(tblLocalName);
            List<String> headerRowMetaTable = getMetaTableHeader(tblLocalName);
            setColumnMappings(headerRow, elements, true);

            if (listMetaTableData != null) {
                setColumnMappings(headerRowMetaTable, elements, false);
            }
            logColumnMappings(tblLocalName, headerRow, headerRowMetaTable, elements);

            instance.writeTableStart(tblName, tblAttrs);
            instance.setCurRow(tblName);

            Map<String, DDElement> elemDefs = instance.getElemDefs(tblName);

            // read data
            // there are no data rows in the Excel file. We create empty table
            // first_row = (first_row == last_row) ? last_row : first_row+1;
            boolean emptySheet = spreadsheet.isEmptySheet(tblLocalName);
            int countRows = 0;

            for (int j = 0; j < listTableData.size() || emptySheet; j++) {
                List<String> listRow = listTableData.get(j);
                List<String> listMetaRow = (listMetaTableData != null && listMetaTableData.size() > j)
                        ? listMetaTableData.get(j)
                        : null;

                // don't convert empty rows.
                if (Utils.isEmptyList(listRow) && !emptySheet) {
                    continue;
                }
                countRows++;

                instance.writeRowStart();
                for (int k = 0; k < elements.size(); k++) {
                    DDXmlElement elem = elements.get(k);
                    String elemName = elem.getName();
                    String elemLocalName = elem.getLocalName();
                    String elemAttributes = elem.getAttributes();
                    int colIndex = elem.getColIndex();
                    boolean isMainTable = elem.isMainTable();

                    boolean hasMultipleValues = false;
                    String delim = null;

                    // get element definition info
                    if (elemDefs != null && elemDefs.containsKey(elemLocalName)) {
                        delim = elemDefs.get(elemLocalName).getDelimiter();
                        hasMultipleValues = elemDefs.get(elemLocalName).isHasMultipleValues();
                    }

                    String data = "";
                    if (colIndex > -1 && !emptySheet) {
                        data = (isMainTable) ? getListStringValue(listRow, colIndex)
                                : getListStringValue(listMetaRow, colIndex);
                    }
                    if (hasMultipleValues && !Utils.isNullStr(delim)) {
                        String[] values = data.split(delim);
                        for (String value : values) {
                            instance.writeElement(elemName, elemAttributes, value.trim());
                        }
                    } else {
                        instance.writeElement(elemName, elemAttributes, data);
                    }

                }
                instance.writeRowEnd();
                if (emptySheet) {
                    break;
                }
            }
            instance.writeTableEnd(tblName);
            readerLogger.logNumberOfRows(countRows, tblLocalName);
            readerLogger.logEndSheet(tblLocalName);
        }

    }

    /*
     * Returns the name of the first table
     */
    @Override
    public String getFirstSheetName() {

        if (spreadsheet == null) {
            return null;
        }

        return spreadsheet.getTableName(0);
    }

    /**
     * Returns the list of MS Excel sheet names.
     *
     * @return List of Strings.
     */
    private List<String> getSheetNames() {
        List<String> list = new ArrayList<String>();
        for (String sheetName : spreadsheet.getTables()) {
            list.add(sheetName);
        }
        return list;
    }

    @Override
    public Map<String, String> getSheetSchemas() {
        Map<String, String> resultMap = new LinkedHashMap<String, String>();
        Hashtable userMetadata = metadata.getUserDefined();

        if (userMetadata.containsKey(TBL_SCHEMAS_ATTR_NAME)) {
            String ret = (String) userMetadata.get(TBL_SCHEMAS_ATTR_NAME);
            if (Utils.isNullStr(ret)) {
                return resultMap;
            }

            StringTokenizer stTbl = new StringTokenizer(ret, TBL_SEPARATOR);
            if (stTbl.countTokens() == 0) {
                return resultMap;
            }
            resultMap = new HashMap<String, String>();
            while (stTbl.hasMoreTokens()) {
                String tbl = stTbl.nextToken();
                StringTokenizer stTblProps = new StringTokenizer(tbl, TBL_PROPERTIES_SEPARATOR);
                if (stTblProps.countTokens() < 2) {
                    continue;
                }

                String tblName = null;
                String tblSchema = null;

                while (stTblProps.hasMoreTokens()) {
                    String token = stTblProps.nextToken();
                    if (token.startsWith(TABLE_NAME)) {
                        tblName = token.substring(TABLE_NAME.length());
                    }
                    if (token.startsWith(TABLE_SCHEMA_URL)) {
                        tblSchema = token.substring(TABLE_SCHEMA_URL.length());
                    }
                }
                if (Utils.isNullStr(tblName) || Utils.isNullStr(tblSchema)) {
                    continue;
                }

                // check if table exists
                if (spreadsheet != null && !spreadsheet.tableExists(tblName)) {
                    continue;
                }
                if (!resultMap.containsKey(tblName)) {
                    resultMap.put(tblName, tblSchema);
                }
            }

        }
        return resultMap;
    }

    @Override
    public boolean isEmptySheet(String sheetName) {
        if (spreadsheet == null) {
            return true;
        }

        return spreadsheet.isEmptySheet(sheetName);
    }

    /**
     * DD can generate additional "-meta" sheets with GIS elements for one DD table. In XML these should be handled as 1 table. This
     * is method for finding these kind of sheets and parsing these in parallel with the main sheet
     * @param mainSheetName Name of DD main table.
     * @return Matrix of values retreived from meta table.
     */
    private List<List<String>> getMetaTableData(String mainSheetName) {
        return spreadsheet.getTableData(mainSheetName + DDXMLConverter.META_SHEET_NAME_ODS);
    }

    /**
     * Get the list of meta table column names.
     * @param mainSheetName Name of DD main table.
     * @return List of meta table column names.
     */
    private List<String> getMetaTableHeader(String mainSheetName) {
        return spreadsheet.getTableHeader(mainSheetName + DDXMLConverter.META_SHEET_NAME_ODS);
    }

    /**
     * Set mappings in case user has changed columns ordering.
     * @param listHeaderRow List of column names.
     * @param elements List of DD XML elements.
     * @param isMainTable true if DD main table, false if GIS table.
     */
    private void setColumnMappings(List<String> listHeaderRow, List<DDXmlElement> elements, boolean isMainTable) {
        for (int j = 0; j < elements.size(); j++) {
            DDXmlElement elem = elements.get(j);
            String elemLocalName = elem.getLocalName();
            int k = listHeaderRow.indexOf(elemLocalName);

            if (k > -1) {
                elem.setColIndex(k);
                elem.setMainTable(isMainTable);
            }
        }
    }

    /**
     * Goes through all columns and logs missing and redundant columns into conversion log.
     * @param sheetName Spreadsheet name.
     * @param row List of cell values in a row.
     * @param metaRow List of column names.
     * @param elements List of DD XML elements.
     */
    private void logColumnMappings(String sheetName, List<String> row, List<String> metaRow,
            List<DDXmlElement> elements) {

        readerLogger.logNumberOfColumns(row.size(), sheetName);
        if (metaRow != null) {
            readerLogger.logNumberOfColumns(metaRow.size(), sheetName + DDXMLConverter.META_SHEET_NAME);
        }

        List<String> missingColumns = new ArrayList<String>();
        List<String> elemNames = new ArrayList<String>();
        for (DDXmlElement element : elements) {
            if (element.getColIndex() < 0) {
                missingColumns.add(element.getLocalName());
            }
            elemNames.add(element.getLocalName().toLowerCase());
        }
        if (missingColumns.size() > 0) {
            readerLogger.logMissingColumns(StringUtils.join(missingColumns, ", "), sheetName);
        }
        List<String> extraColumns = getExtraColumns(row, elemNames);
        if (extraColumns.size() > 0) {
            readerLogger.logExtraColumns(StringUtils.join(extraColumns, ", "), sheetName);
        }

        if (metaRow != null) {
            List<String> extraMetaColumns = getExtraColumns(metaRow, elemNames);
            if (extraMetaColumns.size() > 0) {
                readerLogger.logExtraColumns(StringUtils.join(extraColumns, ", "),
                        sheetName + DDXMLConverter.META_SHEET_NAME);
            }
        }

    }

    /**
     * Find redundant columns from the list of columns.
     * @param row list of column names
     * @param elemNames list of XML element names
     * @return List of Strings.
     */
    private List<String> getExtraColumns(List<String> row, List<String> elemNames) {
        List<String> extraColumns = new ArrayList<String>();
        for (String colName : row) {
            colName = colName != null ? colName.trim() : "";
            if (!Utils.isNullStr(colName) && !elemNames.contains(colName.toLowerCase())) {
                extraColumns.add(colName);
            }
        }
        return extraColumns;
    }

    /**
     * Get cell value from the list of cell values.
     * @param list of cell values
     * @param colIdx column index.
     * @return string value from list of cell values.
     */
    private String getListStringValue(List<String> list, Integer colIdx) {

        if (list == null) {
            return "";
        }
        if (list.size() < colIdx) {
            return "";
        }
        String data = list.get(colIdx);
        if (data == null) {
            return "";
        }

        return data.trim();
    }

    /**
     * Returns OpenDocumentSpreadsheet object.
     * @return OpenDocumentSpreadsheet object.
     */
    protected OpenDocumentSpreadsheet getSpreadsheet() {
        return this.spreadsheet;
    }
}