Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package com.ostrichemulators.semtool.poi.main.xlsxml; import com.ostrichemulators.semtool.poi.main.LoadingSheetData; import static com.ostrichemulators.semtool.util.RDFDatatypeTools.getRDFStringValue; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.log4j.Logger; import org.apache.poi.ss.usermodel.CellType; import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFCellStyle; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.model.ValueFactory; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.xml.sax.Attributes; import org.xml.sax.SAXException; /** * * @author ryan */ public class LoadingSheetXmlHandler extends XlsXmlBase { private static final Logger log = Logger.getLogger(LoadingSheetXmlHandler.class); private static final Map<String, CellType> formats = new HashMap<>(); private static final ValueFactory vf = SimpleValueFactory.getInstance(); private final Map<Integer, Value> currentrowdata = new LinkedHashMap<>(); private final Map<Integer, String> proplkp = new HashMap<>(); private final Map<String, String> namespaces; private final LoadingSheetData loadingsheet; private final StylesTable styles; private boolean isdate = false; private int rownum; private int colnum; private CellType celltype; static { formats.put("s", CellType.STRING); formats.put("n", CellType.NUMERIC); formats.put("b", CellType.BOOLEAN); } public static int getColNum(String colname) { int sum = 0; for (int i = 0; i < colname.length(); i++) { sum *= 26; char charat = colname.charAt(i); sum += (charat - 'A' + 1); } return sum - 1; } public LoadingSheetXmlHandler(List<String> sst, StylesTable styles, String sheetname, Map<String, String> ns, boolean lsInMem) { super(sst); this.styles = styles; namespaces = ns; // this will automatically convert to a relationship sheet if needed loadingsheet = LoadingSheetData.nodesheet(sheetname, "", lsInMem); } public LoadingSheetData getSheet() { return loadingsheet; } @Override public void startDocument() throws SAXException { log.debug("Processing sheet " + loadingsheet.getName()); super.startDocument(); } @Override public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { if (null != name) { switch (name) { case "row": rownum = Integer.parseInt(attributes.getValue("r")) - 1; currentrowdata.clear(); break; case "c": // c is a new cell String celltypestr = attributes.getValue("t"); celltype = (formats.containsKey(celltypestr) ? formats.get(celltypestr) : CellType.BLANK); // dates don't always have a type attribute if (CellType.NUMERIC == celltype || null == celltypestr) { celltype = CellType.NUMERIC; // check if it's a date String styleidstr = attributes.getValue("s"); int styleid = (null == styleidstr ? 0 : Integer.parseInt(styleidstr)); XSSFCellStyle style = styles.getStyleAt(styleid); int formatIndex = style.getDataFormat(); String formatString = style.getDataFormatString(); isdate = DateUtil.isADateFormat(formatIndex, formatString); } String colname = attributes.getValue("r"); colnum = getColNum(colname.substring(0, colname.lastIndexOf(Integer.toString(rownum + 1)))); break; case "v": // new value for a cell setReading(true); resetContents(); break; } } } @Override public void endElement(String uri, String localName, String name) throws SAXException { if ("row".equals(name)) { if (0 == rownum) { proplkp.clear(); String sheettype = currentrowdata.remove(0).stringValue(); String subjtype = currentrowdata.remove(1).stringValue(); loadingsheet.setSubjectType(subjtype); if ("relation".equalsIgnoreCase(sheettype)) { String objtype = currentrowdata.remove(2).stringValue(); loadingsheet.setObjectType(objtype); } for (Map.Entry<Integer, Value> en : currentrowdata.entrySet()) { loadingsheet.addProperty(en.getValue().stringValue()); proplkp.put(en.getKey(), en.getValue().stringValue()); } return; } else if (1 == rownum) { if (loadingsheet.isRel()) { loadingsheet.setRelname(currentrowdata.get(0).stringValue()); } } fillInRow(currentrowdata, loadingsheet, proplkp); } if (isReading()) { // If we've fully read the data, add it to our row mapping switch (celltype) { case STRING: String strval = this.getStringFromContentsInt(); if (!strval.isEmpty()) { currentrowdata.put(colnum, getRDFStringValue(strval, namespaces, vf)); } break; case BLANK: break; case BOOLEAN: currentrowdata.put(colnum, vf.createLiteral("1".equals(getContents()))); break; case NUMERIC: if (isdate) { currentrowdata.put(colnum, vf.createLiteral(DateUtil.getJavaDate(Double.parseDouble(getContents())))); } else { // see if we can use an integer instead of a double // (excel doesn't distinguish between the two, but I can't figure // out how to get exactly what's shown in the cell via the cell style) String contents = getContents(); Value val = (contents.endsWith(".0") ? vf.createLiteral(Integer.parseInt(contents.substring(0, contents.length() - 2))) : vf.createLiteral(Double.parseDouble(contents))); currentrowdata.put(colnum, val); } break; case ERROR: log.warn("unhandled cell type: CELL_TYPE_ERROR"); break; case FORMULA: log.warn("unhandled cell type: CELL_TYPE_FORMULA"); break; default: log.warn("unhandled cell type: " + celltype); } setReading(false); } } private static void fillInRow(Map<Integer, Value> rowdata, LoadingSheetData sheet, Map<Integer, String> proplkp) { removeComments(rowdata); if (rowdata.isEmpty() || !rowdata.containsKey(1)) { return; } rowdata.remove(0); String slabel = rowdata.remove(1).stringValue(); String olabel = null; if (sheet.isRel() && rowdata.containsKey(2)) { olabel = rowdata.remove(2).stringValue(); } Map<String, Value> props = new HashMap<>(); for (Map.Entry<Integer, Value> en : rowdata.entrySet()) { props.put(proplkp.get(en.getKey()), en.getValue()); } sheet.add(slabel, olabel, props); } /** * Removes any comments from the given mapping. Any column index after the * comment is likewise removed * * @param rowdata the data to remove comments from */ private static void removeComments(Map<Integer, Value> rowdata) { int commentcol = Integer.MAX_VALUE; List<Integer> removers = new ArrayList<>(); for (Map.Entry<Integer, Value> en : rowdata.entrySet()) { int col = en.getKey(); String val = en.getValue().stringValue(); // already seen a comment cell if (col > commentcol) { removers.add(col); } if (val.startsWith("#")) { commentcol = col; removers.add(col); } } for (int col : removers) { rowdata.remove(col); } } @Override public void endDocument() throws SAXException { super.endDocument(); loadingsheet.finishLoading(); if (log.isDebugEnabled()) { log.debug("Loading sheet " + loadingsheet.getName() + " processed. properties: " + Arrays.toString(loadingsheet.getProperties().toArray())); log.debug("Created " + loadingsheet.rows() + (loadingsheet.isRel() ? " relationships" : " entities")); } } }