Java tutorial
/** * ***************************************************************************** * Copyright 2013 SEMOSS.ORG * * This file is part of SEMOSS. * * SEMOSS is free software: you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * SEMOSS is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * SEMOSS. If not, see <http://www.gnu.org/licenses/>. * **************************************************************************** */ package com.ostrichemulators.semtool.poi.main; import com.ostrichemulators.semtool.poi.main.LoadingSheetData.DataIterator; import com.ostrichemulators.semtool.poi.main.LoadingSheetData.LoadingNodeAndPropertyValues; import java.io.BufferedOutputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.log4j.Logger; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.eclipse.rdf4j.model.IRI; import java.awt.Color; import java.io.File; import java.util.Arrays; import java.util.HashSet; import java.util.Objects; import java.util.Set; import org.apache.poi.ss.usermodel.CellStyle; import org.apache.poi.ss.usermodel.CellType; import org.apache.poi.ss.usermodel.FillPatternType; import org.apache.poi.ss.usermodel.IndexedColors; import org.apache.poi.xssf.usermodel.XSSFRow; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.eclipse.rdf4j.model.Value; /** * Create a workbook containing data formated in the Microsoft Excel Sheet * Format. This class has two modes of operation: the ImportData mode, and the * raw mode. In raw mode, callers should use {@link #createWorkbook() }, * {@link #createTab(java.lang.String) }, {@link #addRow(java.lang.String[]) } * and {@link #write(java.io.File) } to generate the XLS file. In ImportData * mode, just use the all-in-one function * {@link #write(gov.va.semoss.poi.main.ImportData, java.io.File) } instead. */ public class XlsWriter implements GraphWriter { private static final Logger log = Logger.getLogger(XlsWriter.class); private static final int TAB_ROWLIMIT = 999999; private XSSFWorkbook currentwb; private XSSFSheet currentsheet; private XSSFRow currentrow; private String desiredtabname; private final List<String> currentheader = new ArrayList<>(); private int rowcount = 0; private final Set<String> currentnames = new HashSet<>(); private int maxtabrows = TAB_ROWLIMIT; /** * Sets the max rows that can be added to a tab before continuing the data on * another tab. This must be less than {@link #TAB_ROWLIMIT} * * @param rowspertab */ public void setTabRowLimit(int rowspertab) { if (rowspertab < 1 || rowspertab > TAB_ROWLIMIT) { log.warn("cannot set rows/tab to " + rowspertab + "; using " + TAB_ROWLIMIT + " instead"); rowspertab = TAB_ROWLIMIT; } maxtabrows = rowspertab; } public XSSFWorkbook getCurrentWb() { return currentwb; } public XSSFSheet getCurrentSheet() { return currentsheet; } public XSSFRow getCurrentRow() { return currentrow; } @Override public void write(ImportData data, File output) throws IOException { write(data, new FileOutputStream(output)); } @Override public void write(ImportData data, OutputStream output) throws IOException { createWorkbook(data); CellStyle errorstyle = currentwb.createCellStyle(); errorstyle.setFillPattern(FillPatternType.SOLID_FOREGROUND); errorstyle.setFillForegroundColor(IndexedColors.PINK.getIndex()); for (LoadingSheetData nodes : data.getNodes()) { List<String> props = new ArrayList<>(nodes.getProperties()); createTab(nodes.getName(), makeHeaderRow(nodes, props)); // +2 -> 1 for the blank first col and 1 for the subject type String[] row = new String[2 + props.size()]; CellStyle[] fmts = new CellStyle[2 + props.size()]; Map<String, IRI> proplkp = nodes.getPropertiesAndDataTypes(); IRI[] datatypes = new IRI[2 + props.size()]; int dtcol = 2; for (String propname : props) { if (null != proplkp.get(propname)) { datatypes[dtcol] = proplkp.get(propname); } dtcol++; } DataIterator di = nodes.iterator(); while (di.hasNext()) { LoadingNodeAndPropertyValues nap = di.next(); row[1] = nap.getSubject(); fmts[1] = (nap.isSubjectError() ? errorstyle : null); int col = 2; for (String prop : props) { Value val = nap.get(prop); row[col++] = (null == val ? null : val.stringValue()); } addRow(row, fmts, datatypes); } } for (LoadingSheetData rels : data.getRels()) { List<String> props = new ArrayList<>(rels.getProperties()); createTab(rels.getName(), makeHeaderRow(rels, props)); // +3 -> 1 for the blank first col and 1 for the subject type, 1 for object type String[] row = new String[3 + props.size()]; CellStyle[] fmts = new CellStyle[3 + props.size()]; if (rels.isEmpty()) { // no rows to add, but still add the relationship name field row[0] = rels.getRelname(); } Map<String, IRI> proplkp = rels.getPropertiesAndDataTypes(); IRI[] datatypes = new IRI[3 + props.size()]; int dtcol = 3; for (String propname : props) { if (null != proplkp.get(propname)) { datatypes[dtcol] = proplkp.get(propname); } dtcol++; } DataIterator di = rels.iterator(); while (di.hasNext()) { LoadingNodeAndPropertyValues nap = di.next(); if (rels.hasErrors()) { log.warn("tab color not set (refactor, please)"); //currentsheet.setTabColor( IndexedColors.ROSE.getIndex() ); } // do we need the relation name in the first column? row[0] = (nextRowIsFirstRowOfTab() ? rels.getRelname() : null); row[1] = nap.getSubject(); fmts[1] = (nap.isSubjectError() ? errorstyle : null); row[2] = nap.getObject(); fmts[2] = (nap.isObjectError() ? errorstyle : null); int col = 3; for (String prop : props) { Value val = nap.get(prop); row[col++] = (null == val ? null : val.stringValue()); } addRow(row, fmts, datatypes); } } write(output); } /** * Is the next row the first one of the tab (excluding headers)? * * @return true if the next call to {@link #addRow( * java.lang.String[], org.apache.poi.ss.usermodel.CellStyle[], * org.eclipse.rdf4j.model.IRI[])} will be the first row of the tab */ protected boolean nextRowIsFirstRowOfTab() { if (maxtabrows == rowcount) { return true; } if (currentheader.isEmpty()) { return (0 == rowcount); } return (1 == rowcount); } public void createWorkbook() { currentwb = new XSSFWorkbook(); } /** * Convenience function to * {@link #createTab(java.lang.String, java.lang.String[])} without a header * row * * @param tabname the desired tab name * @return the actual tab name */ public String createTab(String tabname) { return createTab(tabname, new String[0]); } /** * Creates a new tab, but adds a header row that will be propagated to any new * tabs if the number of rows added > {@link #TAB_ROWLIMIT}. Subsequent * calls to {@link #addRow(java.lang.String[]) } will write to this new tab * * @param tabname the desired tab name * @param headerrow the header row to duplicate if new tabs must be created * @return the actual tab name */ public String createTab(String tabname, String[] headerrow) { currentheader.clear(); desiredtabname = tabname; String realname = generateSheetName(tabname, currentnames); if (null == currentwb) { createWorkbook(); } currentsheet = currentwb.createSheet(realname); rowcount = 0; if (!(null == headerrow || 0 == headerrow.length)) { currentheader.addAll(Arrays.asList(headerrow)); addRow(headerrow); } return realname; } /** * Creates a new row in the current tab. If the current tab has more than * {@link #TAB_ROWLIMIT} rows, a new tab (with a duplicate header row, if set) * will be created and the row added to that tab instead. * * @param values the data */ public void addRow(String[] values) { addRow(values, null); } /** * Adds a new row to the current tab. If the new row requires a new tab to * also be created, do it * * @param values the row data * @param formatting cell formatting * @param datatypes * @return true, if a new tab is created, else false */ public boolean addRow(String[] values, CellStyle[] formatting, IRI[] datatypes) { boolean newtab = (maxtabrows == rowcount); if (newtab) { // need to make a new tab createTab(desiredtabname, currentheader.toArray(new String[0])); } currentrow = currentsheet.createRow(rowcount++); for (int col = 0; col < values.length; col++) { Cell cell = currentrow.createCell(col); if (null != formatting) { if (formatting.length > col && null != formatting[col]) { cell.setCellStyle(formatting[col]); } } String val = values[col]; if (null != val) { if (null == datatypes[col]) { if (NUMERIC.matcher(val).find()) { cell.setCellValue(Double.parseDouble(val)); } else { cell.setCellValue(val.replaceAll("\"", "")); } } else { // set the datatype for this element String str = val.replaceAll("\"", ""); str = String.format("\"%s\"^^<%s>", str, datatypes[col]); cell.setCellValue(str); } } } return newtab; } public boolean addRow(Object[] values) { return addRow(values, null); } public boolean addRow(Object[] values, CellStyle[] formatting) { String[] rows = new String[values.length]; for (int i = 0; i < rows.length; i++) { rows[i] = (null == values[i] ? null : values[i].toString()); } return addRow(rows, formatting, new IRI[values.length]); } /** * Writes the current worksheet to the given file. Any parent directories will * be created automatically * * @param output the file to write to * @throws IOException */ public void write(File output) throws IOException { output.getParentFile().mkdirs(); try (OutputStream newExcelFile = new BufferedOutputStream(new FileOutputStream(output))) { write(newExcelFile); } } public void write(OutputStream output) throws IOException { currentwb.write(output); } private String[] makeHeaderRow(LoadingSheetData b, Collection<String> props) { // make the headers List<String> heads = new ArrayList<>(); if (b.isRel()) { heads.add("Relation"); heads.add(b.getSubjectType()); heads.add(b.getObjectType()); } else { heads.add("Node"); heads.add(b.getSubjectType()); } for (String prop : props) { heads.add(prop); } return heads.toArray(new String[0]); } private void createWorkbook(ImportData importdata) { ImportMetadata data = importdata.getMetadata(); List<String[]> mddata = new ArrayList<>(); if (null != data.getSchemaBuilder()) { mddata.add(new String[] { "@prefix", ":schema", "<" + data.getSchemaBuilder().toString() + ">" }); } if (null != data.getDataBuilder()) { mddata.add(new String[] { "@prefix", ":data", "<" + data.getDataBuilder().toString() + ">" }); } if (null != data.getBase()) { mddata.add(new String[] { "@prefix", ":", "<" + data.getBase().toString() + ">" }); } for (Map.Entry<String, String> en : data.getNamespaces().entrySet()) { mddata.add(new String[] { "@prefix", en.getKey(), "<" + en.getValue() + ">" }); } for (String[] stmt : data.getStatements()) { mddata.add(new String[] { stmt[0], stmt[1], stmt[2] }); } createWorkbook(); List<String> tabnames = new ArrayList<>(); Set<String> sheetnames = new HashSet<>(); for (LoadingSheetData lsd : importdata.getSheets()) { int count = 0; // if we have too many rows for one tab, we have // to separate this sheet data into multiple tabs while (count < lsd.rows()) { String tname = generateSheetName(lsd.getName(), sheetnames); tabnames.add(tname); count += maxtabrows; } } // don't write a metadata sheet if we don't have anything to put in it final String metaSheetName = (mddata.isEmpty() ? null : "MetadataInfo"); writeLoadingSheet(tabnames, metaSheetName); if (!mddata.isEmpty()) { XlsWriter.this.createTab(metaSheetName); boolean first = true; for (String[] row : mddata) { String actuals[] = new String[4]; System.arraycopy(row, 0, actuals, 1, row.length); if (first) { actuals[0] = "Metadata"; first = false; } addRow(actuals); } } } /** * Creates a loading sheet with the given name * * @param nodes * @param metaSheetName if not null, add a metadata tab with this info */ protected void writeLoadingSheet(Collection<String> nodes, String metaSheetName) { List<String[]> data = new ArrayList<>(); data.add(new String[] { "Sheet Name", "Type" }); for (String key : nodes) { data.add(new String[] { key, "Usual" }); } if (null != metaSheetName) { data.add(new String[] { metaSheetName, "Metadata" }); } XlsWriter.this.createTab("Loader"); for (String[] row : data) { addRow(row); } } /** * Common logic for finding a name for an excel workbook worksheet that is * unique for that workbook and not longer than 32 characters. Each call to * this function with the same <code>nodeKey</code> will generate a different * name (Excel tabs cannot have identical names). * * @param nodeKey String to start with * @param keySet Set of names that are already in use. the return of this * function is automatically added to this set * * @return */ public static String generateSheetName(String nodeKey, Set<String> keySet) { final int maxSheetNameLength = 31; if (nodeKey.length() > maxSheetNameLength) { nodeKey = nodeKey.substring(0, maxSheetNameLength); } while (nodeKey.endsWith("-")) { nodeKey = nodeKey.substring(0, nodeKey.length() - 1); } int inc = 10; final String loopkey = nodeKey; while (keySet.contains(nodeKey)) { boolean firstloop = loopkey.equals(nodeKey); // we don't have to chop off anything on the first loop unless it's too long if (firstloop) { if (nodeKey.length() > maxSheetNameLength - 2) { nodeKey = nodeKey.substring(0, maxSheetNameLength - 2); } } else { // on subsequent loops, we need to chop off the last thing we added nodeKey = nodeKey.substring(0, nodeKey.length() - 2); } nodeKey = nodeKey + (inc++); } keySet.add(nodeKey); return nodeKey; } public static class NodeAndPropertyValues extends HashMap<IRI, Value> { private final IRI subject; private final IRI object; public NodeAndPropertyValues(IRI subj) { this(subj, null); } public NodeAndPropertyValues(IRI subject, IRI object) { this.subject = subject; this.object = object; } public IRI getSubject() { return subject; } public IRI getObject() { return object; } public boolean isRel() { return (null != object); } } public static class SheetRowCol { public final String sheetname; public final int row; public final int col; public SheetRowCol(String sheetname, int row, int col) { this.sheetname = sheetname; this.row = row; this.col = col; } @Override public int hashCode() { int hash = 7; hash = 41 * hash + Objects.hashCode(this.sheetname); hash = 41 * hash + this.row; hash = 41 * hash + this.col; return hash; } @Override public boolean equals(Object obj) { if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } final SheetRowCol other = (SheetRowCol) obj; if (!Objects.equals(this.sheetname, other.sheetname)) { return false; } if (this.row != other.row) { return false; } return (this.col == other.col); } @Override public String toString() { return sheetname + " (" + row + "," + col + ")"; } } public static class CellFormatting { public final Color background; public CellFormatting(Color background) { this.background = background; } @Override public String toString() { return background.toString(); } @Override public int hashCode() { int hash = 5; hash = 89 * hash + Objects.hashCode(this.background); return hash; } @Override public boolean equals(Object obj) { if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } final CellFormatting other = (CellFormatting) obj; return (Objects.equals(this.background, other.background)); } } }