com.ostrichemulators.semtool.poi.main.XlsWriter.java Source code

Introduction

Here is the source code for com.ostrichemulators.semtool.poi.main.XlsWriter.java
Source

/**
 * *****************************************************************************
 * Copyright 2013 SEMOSS.ORG
 *
 * This file is part of SEMOSS.
 *
 * SEMOSS is free software: you can redistribute it and/or modify it under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation, either version 3 of the License, or (at your option) any later
 * version.
 *
 * SEMOSS is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * SEMOSS. If not, see <http://www.gnu.org/licenses/>.
 * ****************************************************************************
 */
package com.ostrichemulators.semtool.poi.main;

import com.ostrichemulators.semtool.poi.main.LoadingSheetData.DataIterator;
import com.ostrichemulators.semtool.poi.main.LoadingSheetData.LoadingNodeAndPropertyValues;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.eclipse.rdf4j.model.IRI;
import java.awt.Color;
import java.io.File;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.FillPatternType;
import org.apache.poi.ss.usermodel.IndexedColors;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.eclipse.rdf4j.model.Value;

/**
 * Create a workbook containing data formated in the Microsoft Excel Sheet
 * Format. This class has two modes of operation: the ImportData mode, and the
 * raw mode. In raw mode, callers should use {@link #createWorkbook() },
 * {@link #createTab(java.lang.String) }, {@link #addRow(java.lang.String[]) }
 * and {@link #write(java.io.File) } to generate the XLS file. In ImportData
 * mode, just use the all-in-one function
 * {@link #write(gov.va.semoss.poi.main.ImportData, java.io.File) } instead.
 */
public class XlsWriter implements GraphWriter {

    private static final Logger log = Logger.getLogger(XlsWriter.class);
    private static final int TAB_ROWLIMIT = 999999;

    private XSSFWorkbook currentwb;
    private XSSFSheet currentsheet;
    private XSSFRow currentrow;
    private String desiredtabname;
    private final List<String> currentheader = new ArrayList<>();
    private int rowcount = 0;
    private final Set<String> currentnames = new HashSet<>();
    private int maxtabrows = TAB_ROWLIMIT;

    /**
     * Sets the max rows that can be added to a tab before continuing the data on
     * another tab. This must be less than {@link #TAB_ROWLIMIT}
     *
     * @param rowspertab
     */
    public void setTabRowLimit(int rowspertab) {
        if (rowspertab < 1 || rowspertab > TAB_ROWLIMIT) {
            log.warn("cannot set rows/tab to " + rowspertab + "; using " + TAB_ROWLIMIT + " instead");
            rowspertab = TAB_ROWLIMIT;
        }
        maxtabrows = rowspertab;
    }

    public XSSFWorkbook getCurrentWb() {
        return currentwb;
    }

    public XSSFSheet getCurrentSheet() {
        return currentsheet;
    }

    public XSSFRow getCurrentRow() {
        return currentrow;
    }

    @Override
    public void write(ImportData data, File output) throws IOException {
        write(data, new FileOutputStream(output));
    }

    @Override
    public void write(ImportData data, OutputStream output) throws IOException {
        createWorkbook(data);

        CellStyle errorstyle = currentwb.createCellStyle();
        errorstyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);
        errorstyle.setFillForegroundColor(IndexedColors.PINK.getIndex());

        for (LoadingSheetData nodes : data.getNodes()) {
            List<String> props = new ArrayList<>(nodes.getProperties());
            createTab(nodes.getName(), makeHeaderRow(nodes, props));

            // +2 -> 1 for the blank first col and 1 for the subject type
            String[] row = new String[2 + props.size()];
            CellStyle[] fmts = new CellStyle[2 + props.size()];

            Map<String, IRI> proplkp = nodes.getPropertiesAndDataTypes();
            IRI[] datatypes = new IRI[2 + props.size()];
            int dtcol = 2;
            for (String propname : props) {
                if (null != proplkp.get(propname)) {
                    datatypes[dtcol] = proplkp.get(propname);
                }
                dtcol++;
            }

            DataIterator di = nodes.iterator();
            while (di.hasNext()) {
                LoadingNodeAndPropertyValues nap = di.next();
                row[1] = nap.getSubject();
                fmts[1] = (nap.isSubjectError() ? errorstyle : null);

                int col = 2;
                for (String prop : props) {
                    Value val = nap.get(prop);
                    row[col++] = (null == val ? null : val.stringValue());
                }

                addRow(row, fmts, datatypes);
            }
        }

        for (LoadingSheetData rels : data.getRels()) {
            List<String> props = new ArrayList<>(rels.getProperties());
            createTab(rels.getName(), makeHeaderRow(rels, props));

            // +3 -> 1 for the blank first col and 1 for the subject type, 1 for object type
            String[] row = new String[3 + props.size()];
            CellStyle[] fmts = new CellStyle[3 + props.size()];

            if (rels.isEmpty()) {
                // no rows to add, but still add the relationship name field
                row[0] = rels.getRelname();
            }

            Map<String, IRI> proplkp = rels.getPropertiesAndDataTypes();
            IRI[] datatypes = new IRI[3 + props.size()];
            int dtcol = 3;
            for (String propname : props) {
                if (null != proplkp.get(propname)) {
                    datatypes[dtcol] = proplkp.get(propname);
                }
                dtcol++;
            }

            DataIterator di = rels.iterator();
            while (di.hasNext()) {
                LoadingNodeAndPropertyValues nap = di.next();
                if (rels.hasErrors()) {
                    log.warn("tab color not set (refactor, please)");
                    //currentsheet.setTabColor( IndexedColors.ROSE.getIndex() );
                }

                // do we need the relation name in the first column?
                row[0] = (nextRowIsFirstRowOfTab() ? rels.getRelname() : null);

                row[1] = nap.getSubject();
                fmts[1] = (nap.isSubjectError() ? errorstyle : null);

                row[2] = nap.getObject();
                fmts[2] = (nap.isObjectError() ? errorstyle : null);

                int col = 3;
                for (String prop : props) {
                    Value val = nap.get(prop);
                    row[col++] = (null == val ? null : val.stringValue());
                }

                addRow(row, fmts, datatypes);
            }
        }

        write(output);
    }

    /**
     * Is the next row the first one of the tab (excluding headers)?
     *
     * @return true if the next call to {@link #addRow(
     * java.lang.String[], org.apache.poi.ss.usermodel.CellStyle[],
     * org.eclipse.rdf4j.model.IRI[])} will be the first row of the tab
     */
    protected boolean nextRowIsFirstRowOfTab() {
        if (maxtabrows == rowcount) {
            return true;
        }

        if (currentheader.isEmpty()) {
            return (0 == rowcount);
        }
        return (1 == rowcount);
    }

    public void createWorkbook() {
        currentwb = new XSSFWorkbook();
    }

    /**
     * Convenience function to
     * {@link #createTab(java.lang.String, java.lang.String[])} without a header
     * row
     *
     * @param tabname the desired tab name
     * @return the actual tab name
     */
    public String createTab(String tabname) {
        return createTab(tabname, new String[0]);
    }

    /**
     * Creates a new tab, but adds a header row that will be propagated to any new
     * tabs if the number of rows added &gt; {@link #TAB_ROWLIMIT}. Subsequent
     * calls to {@link #addRow(java.lang.String[]) } will write to this new tab
     *
     * @param tabname the desired tab name
     * @param headerrow the header row to duplicate if new tabs must be created
     * @return the actual tab name
     */
    public String createTab(String tabname, String[] headerrow) {
        currentheader.clear();
        desiredtabname = tabname;
        String realname = generateSheetName(tabname, currentnames);

        if (null == currentwb) {
            createWorkbook();
        }

        currentsheet = currentwb.createSheet(realname);
        rowcount = 0;

        if (!(null == headerrow || 0 == headerrow.length)) {
            currentheader.addAll(Arrays.asList(headerrow));
            addRow(headerrow);
        }

        return realname;
    }

    /**
     * Creates a new row in the current tab. If the current tab has more than
     * {@link #TAB_ROWLIMIT} rows, a new tab (with a duplicate header row, if set)
     * will be created and the row added to that tab instead.
     *
     * @param values the data
     */
    public void addRow(String[] values) {
        addRow(values, null);
    }

    /**
     * Adds a new row to the current tab. If the new row requires a new tab to
     * also be created, do it
     *
     * @param values the row data
     * @param formatting cell formatting
     * @param datatypes
     * @return true, if a new tab is created, else false
     */
    public boolean addRow(String[] values, CellStyle[] formatting, IRI[] datatypes) {
        boolean newtab = (maxtabrows == rowcount);

        if (newtab) {
            // need to make a new tab
            createTab(desiredtabname, currentheader.toArray(new String[0]));
        }

        currentrow = currentsheet.createRow(rowcount++);
        for (int col = 0; col < values.length; col++) {
            Cell cell = currentrow.createCell(col);
            if (null != formatting) {
                if (formatting.length > col && null != formatting[col]) {
                    cell.setCellStyle(formatting[col]);
                }
            }

            String val = values[col];
            if (null != val) {
                if (null == datatypes[col]) {
                    if (NUMERIC.matcher(val).find()) {
                        cell.setCellValue(Double.parseDouble(val));
                    } else {
                        cell.setCellValue(val.replaceAll("\"", ""));
                    }
                } else {
                    // set the datatype for this element
                    String str = val.replaceAll("\"", "");
                    str = String.format("\"%s\"^^<%s>", str, datatypes[col]);
                    cell.setCellValue(str);
                }
            }
        }

        return newtab;
    }

    public boolean addRow(Object[] values) {
        return addRow(values, null);
    }

    public boolean addRow(Object[] values, CellStyle[] formatting) {
        String[] rows = new String[values.length];
        for (int i = 0; i < rows.length; i++) {
            rows[i] = (null == values[i] ? null : values[i].toString());
        }

        return addRow(rows, formatting, new IRI[values.length]);
    }

    /**
     * Writes the current worksheet to the given file. Any parent directories will
     * be created automatically
     *
     * @param output the file to write to
     * @throws IOException
     */
    public void write(File output) throws IOException {
        output.getParentFile().mkdirs();
        try (OutputStream newExcelFile = new BufferedOutputStream(new FileOutputStream(output))) {
            write(newExcelFile);
        }
    }

    public void write(OutputStream output) throws IOException {
        currentwb.write(output);
    }

    private String[] makeHeaderRow(LoadingSheetData b, Collection<String> props) {
        // make the headers
        List<String> heads = new ArrayList<>();
        if (b.isRel()) {
            heads.add("Relation");
            heads.add(b.getSubjectType());
            heads.add(b.getObjectType());
        } else {
            heads.add("Node");
            heads.add(b.getSubjectType());
        }

        for (String prop : props) {
            heads.add(prop);
        }

        return heads.toArray(new String[0]);
    }

    private void createWorkbook(ImportData importdata) {
        ImportMetadata data = importdata.getMetadata();

        List<String[]> mddata = new ArrayList<>();
        if (null != data.getSchemaBuilder()) {
            mddata.add(new String[] { "@prefix", ":schema", "<" + data.getSchemaBuilder().toString() + ">" });
        }

        if (null != data.getDataBuilder()) {
            mddata.add(new String[] { "@prefix", ":data", "<" + data.getDataBuilder().toString() + ">" });
        }

        if (null != data.getBase()) {
            mddata.add(new String[] { "@prefix", ":", "<" + data.getBase().toString() + ">" });
        }

        for (Map.Entry<String, String> en : data.getNamespaces().entrySet()) {
            mddata.add(new String[] { "@prefix", en.getKey(), "<" + en.getValue() + ">" });
        }

        for (String[] stmt : data.getStatements()) {
            mddata.add(new String[] { stmt[0], stmt[1], stmt[2] });
        }

        createWorkbook();

        List<String> tabnames = new ArrayList<>();
        Set<String> sheetnames = new HashSet<>();

        for (LoadingSheetData lsd : importdata.getSheets()) {
            int count = 0;

            // if we have too many rows for one tab, we have
            // to separate this sheet data into multiple tabs
            while (count < lsd.rows()) {
                String tname = generateSheetName(lsd.getName(), sheetnames);
                tabnames.add(tname);
                count += maxtabrows;
            }
        }

        // don't write a metadata sheet if we don't have anything to put in it
        final String metaSheetName = (mddata.isEmpty() ? null : "MetadataInfo");

        writeLoadingSheet(tabnames, metaSheetName);

        if (!mddata.isEmpty()) {
            XlsWriter.this.createTab(metaSheetName);
            boolean first = true;
            for (String[] row : mddata) {
                String actuals[] = new String[4];
                System.arraycopy(row, 0, actuals, 1, row.length);
                if (first) {
                    actuals[0] = "Metadata";
                    first = false;
                }
                addRow(actuals);
            }
        }
    }

    /**
     * Creates a loading sheet with the given name
     *
     * @param nodes
     * @param metaSheetName if not null, add a metadata tab with this info
     */
    protected void writeLoadingSheet(Collection<String> nodes, String metaSheetName) {

        List<String[]> data = new ArrayList<>();
        data.add(new String[] { "Sheet Name", "Type" });
        for (String key : nodes) {
            data.add(new String[] { key, "Usual" });
        }
        if (null != metaSheetName) {
            data.add(new String[] { metaSheetName, "Metadata" });
        }

        XlsWriter.this.createTab("Loader");
        for (String[] row : data) {
            addRow(row);
        }
    }

    /**
     * Common logic for finding a name for an excel workbook worksheet that is
     * unique for that workbook and not longer than 32 characters. Each call to
     * this function with the same <code>nodeKey</code> will generate a different
     * name (Excel tabs cannot have identical names).
     *
     * @param nodeKey String to start with
     * @param keySet Set of names that are already in use. the return of this
     * function is automatically added to this set
     *
     * @return
     */
    public static String generateSheetName(String nodeKey, Set<String> keySet) {
        final int maxSheetNameLength = 31;

        if (nodeKey.length() > maxSheetNameLength) {
            nodeKey = nodeKey.substring(0, maxSheetNameLength);
        }

        while (nodeKey.endsWith("-")) {
            nodeKey = nodeKey.substring(0, nodeKey.length() - 1);
        }

        int inc = 10;
        final String loopkey = nodeKey;
        while (keySet.contains(nodeKey)) {
            boolean firstloop = loopkey.equals(nodeKey);

            // we don't have to chop off anything on the first loop unless it's too long
            if (firstloop) {
                if (nodeKey.length() > maxSheetNameLength - 2) {
                    nodeKey = nodeKey.substring(0, maxSheetNameLength - 2);
                }
            } else {
                // on subsequent loops, we need to chop off the last thing we added
                nodeKey = nodeKey.substring(0, nodeKey.length() - 2);
            }

            nodeKey = nodeKey + (inc++);
        }
        keySet.add(nodeKey);

        return nodeKey;
    }

    public static class NodeAndPropertyValues extends HashMap<IRI, Value> {

        private final IRI subject;
        private final IRI object;

        public NodeAndPropertyValues(IRI subj) {
            this(subj, null);
        }

        public NodeAndPropertyValues(IRI subject, IRI object) {
            this.subject = subject;
            this.object = object;
        }

        public IRI getSubject() {
            return subject;
        }

        public IRI getObject() {
            return object;
        }

        public boolean isRel() {
            return (null != object);
        }
    }

    public static class SheetRowCol {

        public final String sheetname;
        public final int row;
        public final int col;

        public SheetRowCol(String sheetname, int row, int col) {
            this.sheetname = sheetname;
            this.row = row;
            this.col = col;
        }

        @Override
        public int hashCode() {
            int hash = 7;
            hash = 41 * hash + Objects.hashCode(this.sheetname);
            hash = 41 * hash + this.row;
            hash = 41 * hash + this.col;
            return hash;
        }

        @Override
        public boolean equals(Object obj) {
            if (obj == null) {
                return false;
            }
            if (getClass() != obj.getClass()) {
                return false;
            }
            final SheetRowCol other = (SheetRowCol) obj;
            if (!Objects.equals(this.sheetname, other.sheetname)) {
                return false;
            }
            if (this.row != other.row) {
                return false;
            }
            return (this.col == other.col);
        }

        @Override
        public String toString() {
            return sheetname + " (" + row + "," + col + ")";
        }
    }

    public static class CellFormatting {

        public final Color background;

        public CellFormatting(Color background) {
            this.background = background;
        }

        @Override
        public String toString() {
            return background.toString();
        }

        @Override
        public int hashCode() {
            int hash = 5;
            hash = 89 * hash + Objects.hashCode(this.background);
            return hash;
        }

        @Override
        public boolean equals(Object obj) {
            if (obj == null) {
                return false;
            }
            if (getClass() != obj.getClass()) {
                return false;
            }
            final CellFormatting other = (CellFormatting) obj;
            return (Objects.equals(this.background, other.background));
        }
    }
}