com.ostrichemulators.semtool.poi.main.xlsxml.LoadingSheetXmlHandler.java Source code

Java tutorial

Introduction

Here is the source code for com.ostrichemulators.semtool.poi.main.xlsxml.LoadingSheetXmlHandler.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.ostrichemulators.semtool.poi.main.xlsxml;

import com.ostrichemulators.semtool.poi.main.LoadingSheetData;
import static com.ostrichemulators.semtool.util.RDFDatatypeTools.getRDFStringValue;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;

/**
 *
 * @author ryan
 */
public class LoadingSheetXmlHandler extends XlsXmlBase {

    private static final Logger log = Logger.getLogger(LoadingSheetXmlHandler.class);
    private static final Map<String, CellType> formats = new HashMap<>();
    private static final ValueFactory vf = SimpleValueFactory.getInstance();

    private final Map<Integer, Value> currentrowdata = new LinkedHashMap<>();
    private final Map<Integer, String> proplkp = new HashMap<>();
    private final Map<String, String> namespaces;
    private final LoadingSheetData loadingsheet;
    private final StylesTable styles;
    private boolean isdate = false;
    private int rownum;
    private int colnum;
    private CellType celltype;

    static {
        formats.put("s", CellType.STRING);
        formats.put("n", CellType.NUMERIC);
        formats.put("b", CellType.BOOLEAN);
    }

    public static int getColNum(String colname) {
        int sum = 0;

        for (int i = 0; i < colname.length(); i++) {
            sum *= 26;
            char charat = colname.charAt(i);
            sum += (charat - 'A' + 1);
        }

        return sum - 1;
    }

    public LoadingSheetXmlHandler(List<String> sst, StylesTable styles, String sheetname, Map<String, String> ns,
            boolean lsInMem) {
        super(sst);
        this.styles = styles;
        namespaces = ns;

        // this will automatically convert to a relationship sheet if needed
        loadingsheet = LoadingSheetData.nodesheet(sheetname, "", lsInMem);
    }

    public LoadingSheetData getSheet() {
        return loadingsheet;
    }

    @Override
    public void startDocument() throws SAXException {
        log.debug("Processing sheet " + loadingsheet.getName());
        super.startDocument();
    }

    @Override
    public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
        if (null != name) {
            switch (name) {
            case "row":
                rownum = Integer.parseInt(attributes.getValue("r")) - 1;
                currentrowdata.clear();
                break;
            case "c": // c is a new cell
                String celltypestr = attributes.getValue("t");
                celltype = (formats.containsKey(celltypestr) ? formats.get(celltypestr) : CellType.BLANK);
                // dates don't always have a type attribute
                if (CellType.NUMERIC == celltype || null == celltypestr) {
                    celltype = CellType.NUMERIC;

                    // check if it's a date
                    String styleidstr = attributes.getValue("s");
                    int styleid = (null == styleidstr ? 0 : Integer.parseInt(styleidstr));

                    XSSFCellStyle style = styles.getStyleAt(styleid);
                    int formatIndex = style.getDataFormat();
                    String formatString = style.getDataFormatString();
                    isdate = DateUtil.isADateFormat(formatIndex, formatString);
                }

                String colname = attributes.getValue("r");
                colnum = getColNum(colname.substring(0, colname.lastIndexOf(Integer.toString(rownum + 1))));
                break;
            case "v": // new value for a cell
                setReading(true);
                resetContents();
                break;
            }
        }
    }

    @Override
    public void endElement(String uri, String localName, String name) throws SAXException {

        if ("row".equals(name)) {
            if (0 == rownum) {
                proplkp.clear();

                String sheettype = currentrowdata.remove(0).stringValue();
                String subjtype = currentrowdata.remove(1).stringValue();

                loadingsheet.setSubjectType(subjtype);

                if ("relation".equalsIgnoreCase(sheettype)) {
                    String objtype = currentrowdata.remove(2).stringValue();
                    loadingsheet.setObjectType(objtype);
                }

                for (Map.Entry<Integer, Value> en : currentrowdata.entrySet()) {
                    loadingsheet.addProperty(en.getValue().stringValue());
                    proplkp.put(en.getKey(), en.getValue().stringValue());
                }

                return;
            } else if (1 == rownum) {
                if (loadingsheet.isRel()) {
                    loadingsheet.setRelname(currentrowdata.get(0).stringValue());
                }
            }

            fillInRow(currentrowdata, loadingsheet, proplkp);
        }

        if (isReading()) {
            // If we've fully read the data, add it to our row mapping
            switch (celltype) {
            case STRING:
                String strval = this.getStringFromContentsInt();
                if (!strval.isEmpty()) {
                    currentrowdata.put(colnum, getRDFStringValue(strval, namespaces, vf));
                }
                break;
            case BLANK:
                break;
            case BOOLEAN:
                currentrowdata.put(colnum, vf.createLiteral("1".equals(getContents())));
                break;
            case NUMERIC:
                if (isdate) {
                    currentrowdata.put(colnum,
                            vf.createLiteral(DateUtil.getJavaDate(Double.parseDouble(getContents()))));
                } else {
                    // see if we can use an integer instead of a double
                    // (excel doesn't distinguish between the two, but I can't figure
                    // out how to get exactly what's shown in the cell via the cell style)
                    String contents = getContents();
                    Value val = (contents.endsWith(".0")
                            ? vf.createLiteral(Integer.parseInt(contents.substring(0, contents.length() - 2)))
                            : vf.createLiteral(Double.parseDouble(contents)));
                    currentrowdata.put(colnum, val);
                }
                break;
            case ERROR:
                log.warn("unhandled cell type: CELL_TYPE_ERROR");
                break;
            case FORMULA:
                log.warn("unhandled cell type: CELL_TYPE_FORMULA");
                break;
            default:
                log.warn("unhandled cell type: " + celltype);
            }

            setReading(false);
        }
    }

    private static void fillInRow(Map<Integer, Value> rowdata, LoadingSheetData sheet,
            Map<Integer, String> proplkp) {
        removeComments(rowdata);
        if (rowdata.isEmpty() || !rowdata.containsKey(1)) {
            return;
        }

        rowdata.remove(0);
        String slabel = rowdata.remove(1).stringValue();
        String olabel = null;
        if (sheet.isRel() && rowdata.containsKey(2)) {
            olabel = rowdata.remove(2).stringValue();
        }

        Map<String, Value> props = new HashMap<>();
        for (Map.Entry<Integer, Value> en : rowdata.entrySet()) {
            props.put(proplkp.get(en.getKey()), en.getValue());
        }

        sheet.add(slabel, olabel, props);
    }

    /**
     * Removes any comments from the given mapping. Any column index after the
     * comment is likewise removed
     *
     * @param rowdata the data to remove comments from
     */
    private static void removeComments(Map<Integer, Value> rowdata) {
        int commentcol = Integer.MAX_VALUE;
        List<Integer> removers = new ArrayList<>();
        for (Map.Entry<Integer, Value> en : rowdata.entrySet()) {
            int col = en.getKey();
            String val = en.getValue().stringValue();

            // already seen a comment cell
            if (col > commentcol) {
                removers.add(col);
            }

            if (val.startsWith("#")) {
                commentcol = col;
                removers.add(col);
            }
        }

        for (int col : removers) {
            rowdata.remove(col);
        }
    }

    @Override
    public void endDocument() throws SAXException {
        super.endDocument();
        loadingsheet.finishLoading();

        if (log.isDebugEnabled()) {
            log.debug("Loading sheet " + loadingsheet.getName() + " processed. properties: "
                    + Arrays.toString(loadingsheet.getProperties().toArray()));
            log.debug("Created " + loadingsheet.rows() + (loadingsheet.isRel() ? " relationships" : " entities"));
        }
    }
}