com.opendoorlogistics.core.tables.io.XmlParserLoader.java Source code

Java tutorial

Introduction

Here is the source code for com.opendoorlogistics.core.tables.io.XmlParserLoader.java

Source

/*******************************************************************************
 * Copyright (c) 2014 Open Door Logistics (www.opendoorlogistics.com)
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Lesser Public License 3.0
 * which accompanies this distribution, and is available at
 * http://www.gnu.org/licenses/lgpl.html
 * 
 ******************************************************************************/
package com.opendoorlogistics.core.tables.io;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;

import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

import com.opendoorlogistics.api.ExecutionReport;
import com.opendoorlogistics.api.ODLApi;
import com.opendoorlogistics.api.components.ProcessingApi;
import com.opendoorlogistics.api.tables.ODLColumnType;
import com.opendoorlogistics.api.tables.ODLDatastoreAlterable;
import com.opendoorlogistics.api.tables.ODLTable;
import com.opendoorlogistics.api.tables.ODLTableAlterable;
import com.opendoorlogistics.api.tables.ODLTableDefinition;
import com.opendoorlogistics.api.tables.ODLTableDefinitionAlterable;
import com.opendoorlogistics.api.tables.ODLTableReadOnly;
import com.opendoorlogistics.core.scripts.execution.ExecutionReportImpl;
import com.opendoorlogistics.core.tables.ODLFactory;
import com.opendoorlogistics.core.tables.io.PoiIO.SchemaSheetInformation;
import com.opendoorlogistics.core.tables.io.SchemaIO.SchemaColumnDefinition;
import com.opendoorlogistics.core.tables.utils.DatastoreCopier;
import com.opendoorlogistics.core.utils.UpdateTimer;
import com.opendoorlogistics.core.utils.strings.Strings;

/**
 * Based on example at http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
 * Also http://blogs.msdn.com/b/brian_jones/archive/2007/05/29/simple-spreadsheetml-file-part-3-formatting.aspx
 * @author Phil
 *
 */
public class XmlParserLoader {
    private final UpdateTimer timer = new UpdateTimer(250);
    private final ExecutionReport report;
    private final File file;
    private final ODLDatastoreAlterable<ODLTableAlterable> ds;
    private final ProcessingApi processingApi;
    private String baseMessage;

    private XmlParserLoader(File file, ODLDatastoreAlterable<ODLTableAlterable> ds, ProcessingApi processingApi,
            ExecutionReport report) {
        this.file = file;
        this.ds = ds;
        this.report = report;
        this.processingApi = processingApi;
    }

    private void throwIfUserQuit() {
        if (processingApi != null && processingApi.isCancelled()) {
            throw new RuntimeException("User cancelled Excel loading.");
        }
    }

    private XMLReader createSheetParser(ContentHandler handler) {
        try {
            XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");

            parser.setContentHandler(handler);
            return parser;

        } catch (Exception e) {
            throw new RuntimeException(e);
        }

        //        ContentHandler handler2 =
        //                 new XSSFSheetXMLHandler(styles, sst, new SheetContentsHandler(){
        //
        //                  @Override
        //                  public void startRow(int rowNum) {
        //                     // TODO Auto-generated method stub
        //                     
        //                  }
        //
        //                  @Override
        //                  public void endRow() {
        //                     // TODO Auto-generated method stub
        //                     
        //                  }
        //
        //                  @Override
        //                  public void cell(String cellReference, String formattedValue) {
        //                     System.out.println(cellReference + " " + formattedValue);
        //                  }
        //
        //                  @Override
        //                  public void headerFooter(String text, boolean isHeader, String tagName) {
        //                     // TODO Auto-generated method stub
        //                     
        //                  }}
        //                 , true);

    }

    //SchemaIO schema#

    private class ReadSchemaSheet implements SheetContentsHandler {
        private TreeMap<Integer, TreeMap<Integer, String>> rowsMap = new TreeMap<>();
        private TreeMap<Integer, String> currentRow;

        @Override
        public void startRow(int rowNum) {
            currentRow = new TreeMap<>();
            rowsMap.put(rowNum, currentRow);
        }

        @Override
        public void endRow(int row) {
            // TODO Auto-generated method stub

        }

        @Override
        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
            CellReference ref = new CellReference(cellReference);
            currentRow.put((int) ref.getCol(), formattedValue);
        }

        @Override
        public void headerFooter(String text, boolean isHeader, String tagName) {
            // TODO Auto-generated method stub

        }

        SchemaSheetInformation finish(ExecutionReport report) {
            // remove any totally empty rows
            Iterator<Map.Entry<Integer, TreeMap<Integer, String>>> it = rowsMap.entrySet().iterator();
            while (it.hasNext()) {
                Map.Entry<Integer, TreeMap<Integer, String>> row = it.next();
                boolean remove = false;
                TreeMap<Integer, String> r = row.getValue();
                if (r.size() == 0) {
                    remove = true;
                }

                if (!remove) {
                    remove = true;
                    for (String v : r.values()) {
                        if (!Strings.isEmpty(v)) {
                            remove = false;
                            break;
                        }
                    }
                }

                if (remove) {
                    it.remove();
                }
            }

            class ConsecutiveRows extends ArrayList<Map.Entry<Integer, TreeMap<Integer, String>>> {
                ODLTableReadOnly read(String tableName) {
                    // create table definition
                    ReadTableDefinition dfnHandler = new ReadTableDefinition(tableName, null, false);
                    process(dfnHandler);
                    ODLTableDefinition dfn = dfnHandler.createTableDefinition();

                    // read values
                    ODLDatastoreAlterable<ODLTableAlterable> dsDfn = ODLFactory.createAlterable();
                    ODLTableAlterable table = dsDfn.createTable(tableName, -1);
                    DatastoreCopier.copyTableDefinition(dfn, table);
                    process(new ReadIntoTableHandler(table, dfnHandler.getHeaderRow() + 1, Integer.MAX_VALUE));

                    return table;
                }

                void process(SheetContentsHandler handler) {
                    for (Map.Entry<Integer, TreeMap<Integer, String>> row : this) {
                        handler.startRow(row.getKey());
                        for (Map.Entry<Integer, String> colVal : row.getValue().entrySet()) {
                            CellReference ref = new CellReference(row.getKey(), colVal.getKey());
                            handler.cell(ref.formatAsString(), colVal.getValue(), null);
                        }
                        handler.endRow(row.getKey());
                    }
                }
            }

            // split into tables of consecutive rows
            ArrayList<ConsecutiveRows> split = new ArrayList<>();
            it = rowsMap.entrySet().iterator();
            ConsecutiveRows current = null;
            while (it.hasNext()) {
                Map.Entry<Integer, TreeMap<Integer, String>> row = it.next();

                if (current == null
                        || (current.size() > 0 && current.get(current.size() - 1).getKey() != row.getKey() - 1)) {
                    current = new ConsecutiveRows();
                    split.add(current);
                }

                current.add(row);

            }

            // process each one
            ArrayList<ODLTableReadOnly> tables = new ArrayList<>();
            for (ConsecutiveRows cr : split) {
                tables.add(cr.read("SchemaTable"));
            }

            return PoiIO.readSchemaFromODLTables(tables, report);
        }
    }

    private static class ColInfo {
        SchemaColumnDefinition dfn;
        ColumnTypeEstimator estimator;
        String name;
    }

    /**
     * Handler which estimates the column types or gets them from the schema
     * @author Phil
     *
     */
    private class ReadTableDefinition implements SheetContentsHandler {
        private final String sheetName;
        private final SchemaIO schemaIO;
        private int headerState = -1;
        private final ArrayList<ColInfo> colInfos = new ArrayList<>();
        private final boolean useEstimators;
        private int headerRow;

        ReadTableDefinition(String sheetName, SchemaIO schema, boolean useEstimators) {
            this.sheetName = sheetName;
            this.schemaIO = schema;
            this.useEstimators = useEstimators;
        }

        @Override
        public void startRow(int rowNum) {
            throwIfUserQuit();

            if (headerState == -1) {
                // now on header row
                headerState = 0;
                headerRow = rowNum;
            }

            if (timer.isUpdate()) {
                postStatus("row " + (rowNum + 1));
            }
        }

        int getHeaderRow() {
            return headerRow;
        }

        @Override
        public void endRow(int row) {
            if (headerState == 0) {
                // if finished header row, fill in any missing column infos
                for (int i = 0; i < colInfos.size(); i++) {
                    if (colInfos.get(i) == null) {
                        colInfos.set(i, new ColInfo());
                        if (useEstimators) {
                            colInfos.get(i).estimator = new ColumnTypeEstimator();
                        }
                    }
                }
            }

            // and flag that we're now on the data
            headerState = 1;
        }

        @Override
        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
            CellReference ref = new CellReference(cellReference);
            int col = ref.getCol();
            if (headerState == 0) {
                // init colinfo for this column
                while (col >= colInfos.size()) {
                    colInfos.add(new ColInfo());
                }

                ColInfo info = colInfos.get(col);
                if (schemaIO != null && Strings.isEmpty(formattedValue) == false) {
                    info.dfn = schemaIO.findDefinition(sheetName, formattedValue);
                }

                if (info.dfn == null && useEstimators) {
                    info.estimator = new ColumnTypeEstimator();
                }

                info.name = formattedValue;
            } else if (headerState == 1) {
                if (!Strings.isEmpty(formattedValue)) {
                    if (col < colInfos.size()) {
                        ColInfo info = colInfos.get(col);
                        if (info.estimator != null) {
                            info.estimator.processValue(formattedValue);
                        }
                    }
                }
            }
        }

        @Override
        public void headerFooter(String text, boolean isHeader, String tagName) {
            // TODO Auto-generated method stub

        }

        ODLTableDefinition createTableDefinition() {
            ODLDatastoreAlterable<ODLTableAlterable> dsDfn = ODLFactory.createAlterable();
            ODLTableDefinitionAlterable ret = dsDfn.createTable(sheetName, -1);
            for (int i = 0; i < colInfos.size(); i++) {
                ColInfo info = colInfos.get(i);
                String name = PoiIO.getValidNewColumnName(info.name, ret);

                if (info.dfn != null) {
                    PoiIO.addColumnFromDfn(info.dfn, name, i, ret);
                } else {
                    ret.addColumn(i, name, useEstimators ? info.estimator.getEstimatedType() : ODLColumnType.STRING,
                            0);
                }
            }

            return ret;
        }
    }

    /**
     * Handler to read into predefined table
     * @author Phil
     *
     */
    private class ReadIntoTableHandler implements SheetContentsHandler {
        private final ODLTable table;
        private final int minRowInclusive;
        private final int maxRowExclusive;
        private int currentOutputRow = -1;

        private ReadIntoTableHandler(ODLTable table, int minRowInclusive, int maxRowExclusive) {
            this.table = table;
            this.minRowInclusive = minRowInclusive;
            this.maxRowExclusive = maxRowExclusive;
        }

        @Override
        public void startRow(int rowNum) {
            throwIfUserQuit();

            if (rowNum >= minRowInclusive && rowNum < maxRowExclusive) {
                currentOutputRow = table.createEmptyRow(-1);
            } else {
                currentOutputRow = -1;
            }

            if (timer.isUpdate()) {
                postStatus("row " + (rowNum + 1));
            }
        }

        @Override
        public void endRow(int row) {

        }

        @Override
        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
            if (currentOutputRow != -1) {
                CellReference ref = new CellReference(cellReference);
                int col = ref.getCol();
                if (col < table.getColumnCount()) {
                    table.setValueAt(formattedValue, currentOutputRow, col);
                }
            }
        }

        @Override
        public void headerFooter(String text, boolean isHeader, String tagName) {
            // TODO Auto-generated method stub

        }

    }

    private SchemaSheetInformation importSchema(XSSFReader r, StylesTable styles, ReadOnlySharedStringsTable sst) {
        try {
            XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) r.getSheetsData();
            while (it.hasNext()) {
                try (InputStream sheet = it.next()) {
                    String name = it.getSheetName();
                    if (Strings.equalsStd(PoiIO.SCHEMA_SHEET_NAME, name)) {
                        InputSource sheetSource = new InputSource(sheet);
                        ReadSchemaSheet readSchemaSheet = new ReadSchemaSheet();
                        parseSheet(styles, sst, sheetSource, readSchemaSheet);
                        return readSchemaSheet.finish(report);
                    }
                }
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

        return null;
    }

    private void parseSheet(StylesTable styles, ReadOnlySharedStringsTable sst, InputSource sheetSource,
            SheetContentsHandler handler) {
        try {
            createSheetParser(new XSSFSheetXMLHandler(styles, sst, handler, false)).parse(sheetSource);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private void postStatus(String s) {
        if (processingApi != null && baseMessage != null) {
            processingApi.postStatusMessage(baseMessage + (s != null ? " - " + s : ""));
        }
    }

    private void doImport() {
        if (!file.exists()) {
            throw new RuntimeException("Excel file does not exist: " + file.getAbsolutePath());
        }

        OPCPackage pkg = null;
        try {
            pkg = OPCPackage.open(file);
            importOPCPackage(pkg);

            // revert for read-only closing
            pkg.revert();

            if (processingApi != null) {
                processingApi.postStatusMessage("Finished loading, now opening file...");
            }
        } catch (Exception e) {
            if (pkg != null) {
                // revert for read-only closing
                pkg.revert();
            }
            report.setFailed(e);
            throw new RuntimeException(e);
        }

    }

    private void doImport(InputStream stream) {

        try (OPCPackage pkg = OPCPackage.open(stream)) {
            importOPCPackage(pkg);

        } catch (Exception e) {
            report.setFailed(e);
            throw new RuntimeException(e);
        }

    }

    /**
     * @param pkg
     * @throws IOException
     * @throws OpenXML4JException
     * @throws InvalidFormatException
     * @throws SAXException
     */
    private void importOPCPackage(OPCPackage pkg)
            throws IOException, OpenXML4JException, InvalidFormatException, SAXException {
        XSSFReader r = new XSSFReader(pkg);
        StylesTable styles = r.getStylesTable();
        ReadOnlySharedStringsTable sst = new ReadOnlySharedStringsTable(pkg);

        SchemaSheetInformation schema = importSchema(r, styles, sst);

        // read table definitions first
        XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) r.getSheetsData();
        ArrayList<Integer> tableIndices = new ArrayList<>();
        ArrayList<Integer> headerRows = new ArrayList<>();
        while (it.hasNext()) {
            try (InputStream sheet = it.next()) {
                String name = it.getSheetName();

                if (!Strings.equalsStd(PoiIO.SCHEMA_SHEET_NAME, name)) {
                    baseMessage = "Loading Excel, analysing sheet " + name;
                    postStatus(null);

                    InputSource sheetSource = new InputSource(sheet);
                    ReadTableDefinition rtd = new ReadTableDefinition(name, schema != null ? schema.schema : null,
                            true);
                    parseSheet(styles, sst, sheetSource, rtd);
                    tableIndices.add(ds.getTableCount());
                    headerRows.add(rtd.getHeaderRow());
                    DatastoreCopier.copyTableDefinition(rtd.createTableDefinition(), ds);
                }
            }
        }

        // Then actual tables
        it = (XSSFReader.SheetIterator) r.getSheetsData();
        int i = 0;
        while (it.hasNext()) {
            try (InputStream sheet = it.next()) {
                InputSource sheetSource = new InputSource(sheet);
                String name = it.getSheetName();
                if (!Strings.equalsStd(PoiIO.SCHEMA_SHEET_NAME, name)) {
                    baseMessage = "Loading sheet " + name + " into memory";
                    postStatus(null);

                    ODLTable table = ds.getTableAt(i);
                    int headerRow = headerRows.get(i);
                    ReadIntoTableHandler readerHandler = new ReadIntoTableHandler(table, headerRow + 1,
                            Integer.MAX_VALUE);
                    parseSheet(styles, sst, sheetSource, readerHandler);
                    i++;
                }
            }

        }
    }

    static void importExcel(InputStream stream, ODLDatastoreAlterable<ODLTableAlterable> ds,
            ProcessingApi processingApi, ExecutionReport report) {
        new XmlParserLoader(null, ds, processingApi, report).doImport(stream);
    }

    static ODLDatastoreAlterable<ODLTableAlterable> importExcel(File file, ProcessingApi processingApi,
            ExecutionReport report) {
        ODLDatastoreAlterable<ODLTableAlterable> ret = ODLFactory.createAlterable();
        importExcel(file, ret, processingApi, report);
        return ret;
    }

    static void importExcel(File file, ODLDatastoreAlterable<ODLTableAlterable> ds, ProcessingApi processingApi,
            ExecutionReport report) {
        new XmlParserLoader(file, ds, processingApi, report).doImport();
    }

    public static void main(String[] args) throws Exception {
        ODLDatastoreAlterable<ODLTableAlterable> ret = ODLFactory.createAlterable();
        ExecutionReport report = new ExecutionReportImpl();
        //File file = new File("C:\\Users\\Phil\\Dropbox\\Business\\ODL\\Testing\\VRP\\demoVRP.xlsx");
        //      File file = new File("C:\\temp\\TestFromLibreOffice.xlsx");
        File file = new File("C:\\temp\\testloading.xlsx");
        System.out.println("Started loading");
        importExcel(file, ret, new ProcessingApi() {

            @Override
            public ODLApi getApi() {
                // TODO Auto-generated method stub
                return null;
            }

            @Override
            public boolean isFinishNow() {
                // TODO Auto-generated method stub
                return false;
            }

            @Override
            public boolean isCancelled() {
                // TODO Auto-generated method stub
                return false;
            }

            @Override
            public void postStatusMessage(String s) {
                System.out.println(s);
            }

            @Override
            public void logWarning(String warning) {
                // TODO Auto-generated method stub

            }
        }, report);

        System.out.println("Finished loading");
        System.out.println(ret);
        //   XmlParserLoader example = new XmlParserLoader();
        // example.processOneSheet(args[0]);
        //   example.processAllSheets("C:\\Users\\Phil\\Dropbox\\Business\\ODL\\Testing\\VRP\\demoVRP.xlsx");
        //example.processAllSheets("C:\\temp\\NumberFormatting.xlsx");
    }
}