com.ostrichemulators.semtool.poi.main.POIReader.java Source code

Java tutorial

Introduction

Here is the source code for com.ostrichemulators.semtool.poi.main.POIReader.java

Source

/**
 * *****************************************************************************
 * Copyright 2013 SEMOSS.ORG
 *
 * This file is part of SEMOSS.
 *
 * SEMOSS is free software: you can redistribute it and/or modify it under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation, either version 3 of the License, or (at your option) any later
 * version.
 *
 * SEMOSS is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * SEMOSS. If not, see <http://www.gnu.org/licenses/>.
 * ****************************************************************************
 */
package com.ostrichemulators.semtool.poi.main;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

import java.util.HashMap;
import java.util.Map;
import org.apache.log4j.Logger;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.eclipse.rdf4j.model.ValueFactory;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;

/**
 * Loading data into SEMOSS using Microsoft Excel Loading Sheet files
 */
public class POIReader implements ImportFileReader {

    private static final Logger logger = Logger.getLogger(POIReader.class);
    private boolean keepLoadInMemory = false;
    private static final ValueFactory VF = SimpleValueFactory.getInstance();

    public static ImportData readNonloadingSheet(File file) throws IOException {
        ImportData d = readNonloadingSheet(new XSSFWorkbook(new FileInputStream(file)));
        d.getMetadata().setSourceOfData(VF.createIRI(file.toURI().toString()));
        return d;
    }

    public static ImportData readNonloadingSheet(Workbook workbook) {
        ImportData id = new ImportData();

        int sheets = workbook.getNumberOfSheets();
        for (int sheetnum = 0; sheetnum < sheets; sheetnum++) {
            Sheet sheet = workbook.getSheetAt(sheetnum);
            String sheetname = workbook.getSheetName(sheetnum);

            // we need to shoehorn the arbitrary data from a spreadsheet into our
            // ImportData class, which has restrictions on the data...we're going
            // to do it by figuring out the row with the most columns, and then
            // naming all the columns with A, B, C...AA, AB...
            // then load everything as if it was plain data
            // first, figure out our max number of columns
            int rows = sheet.getLastRowNum();
            int maxcols = Integer.MIN_VALUE;
            for (int r = 0; r <= rows; r++) {
                Row row = sheet.getRow(r);
                if (null != row) {
                    int cols = (int) row.getLastCellNum();
                    if (cols > maxcols) {
                        maxcols = cols;
                    }
                }
            }

            // second, make "properties" for each column
            LoadingSheetData nlsd = new LoadingSheetData(sheetname, "A");
            for (int c = 1; c < maxcols; c++) {
                nlsd.addProperty(Integer.toString(c));
            }

            // lastly, fill the sheets
            for (int r = 0; r <= rows; r++) {
                Row row = sheet.getRow(r);
                if (null != row) {
                    Map<String, Value> propmap = new HashMap<>();

                    int lastpropcol = row.getLastCellNum();
                    for (int c = 1; c <= lastpropcol; c++) {
                        String val = getString(row.getCell(c));
                        if (!val.isEmpty()) {
                            propmap.put(Integer.toString(c), VF.createLiteral(val));
                        }
                    }

                    nlsd.add(getString(row.getCell(0)), propmap);
                }
            }

            if (!nlsd.isEmpty()) {
                id.add(nlsd);
            }
        }

        return id;
    }

    @Override
    public ImportMetadata getMetadata(File file) throws IOException, ImportValidationException {
        logger.debug("getting metadata from file: " + file);
        LowMemXlsReader reader = null;
        try {
            reader = new LowMemXlsReader(file);
            ImportMetadata data = reader.getMetadata();
            data.setSourceOfData(VF.createIRI(file.toURI().toString()));
            return data;
        } finally {
            if (null != reader) {
                reader.release();
            }
        }
    }

    @Override
    public ImportData readOneFile(File file) throws IOException, ImportValidationException {
        logger.debug("loading data from file: " + file);
        LowMemXlsReader rdr = null;
        try {
            rdr = new LowMemXlsReader(file);
            rdr.keepSheetDataInMemory(keepLoadInMemory);
            ImportData d = rdr.getData();

            d.getMetadata().setSourceOfData(VF.createIRI(file.toURI().toString()));
            logger.debug("finished reading file: " + file);
            return d;
        } finally {
            if (null != rdr) {
                rdr.release();
            }
        }
    }

    @Override
    public void keepLoadInMemory(boolean b) {
        keepLoadInMemory = b;
    }

    /**
     * Always return a non-null string (will be "" for null cells).
     *
     * @param cell
     * @return
     */
    private static String getString(Cell cell) {
        if (null == cell) {
            return "";
        }

        switch (cell.getCellType()) {
        case NUMERIC:
            return Double.toString(cell.getNumericCellValue());
        case BOOLEAN:
            return Boolean.toString(cell.getBooleanCellValue());
        case FORMULA:
            return cell.getCellFormula();
        default:
            return cell.getStringCellValue();
        }
    }
}