workbench.db.importer.ExcelReader.java Source code

Java tutorial

Introduction

Here is the source code for workbench.db.importer.ExcelReader.java

Source

/*
 * ExcelReader.java
 *
 * This file is part of SQL Workbench/J, http://www.sql-workbench.net
 *
 * Copyright 2002-2017, Thomas Kellerer
 *
 * Licensed under a modified Apache License, Version 2.0
 * that restricts the use for certain governments.
 * You may not use this file except in compliance with the License.
 * You may obtain a copy of the License at.
 *
 *     http://sql-workbench.net/manual/license.html
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * To contact the author please send an email to: support@sql-workbench.net
 *
 */
package workbench.db.importer;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.Set;

import workbench.log.LogMgr;
import workbench.resource.ResourceMgr;

import workbench.util.CollectionUtil;
import workbench.util.DurationFormatter;
import workbench.util.FileUtil;
import workbench.util.MessageBuffer;
import workbench.util.StringUtil;
import workbench.util.WbFile;

import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFFormulaEvaluator;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.usermodel.XSSFFormulaEvaluator;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

/**
 *
 * @author Thomas Kellerer
 */
public class ExcelReader implements SpreadsheetReader {
    /**
     * Amount of milliseconds in a day.
     */
    private static final long ONE_DAY = (24L * DurationFormatter.ONE_HOUR);

    private int sheetIndex = -1;
    private String sheetName;

    private final WbFile inputFile;
    private Workbook dataFile;
    private Sheet dataSheet;
    private List<String> headerColumns;
    private String nullString;
    private List<CellRangeAddress> mergedRegions;
    private final Set<String> tsFormats = CollectionUtil.treeSet("HH", "mm", "ss", "SSS", "KK", "kk");

    private final boolean useXLSX;
    private MessageBuffer messages = new MessageBuffer();
    private boolean emptyStringIsNull;
    private boolean useStringDates;
    private DataFormatter dataFormatter = new DataFormatter(true);

    public ExcelReader(File excelFile, int sheetNumber, String name) {
        inputFile = new WbFile(excelFile);
        sheetIndex = sheetNumber > -1 ? sheetNumber : -1;
        if (sheetIndex < 0 && StringUtil.isNonBlank(name)) {
            sheetName = name.trim();
        } else {
            sheetName = null;
        }
        useXLSX = inputFile.getExtension().equalsIgnoreCase("xlsx");
    }

    @Override
    public void setReturnDatesAsString(boolean flag) {
        useStringDates = flag;
    }

    @Override
    public void setEmptyStringIsNull(boolean flag) {
        emptyStringIsNull = flag;
    }

    @Override
    public MessageBuffer getMessages() {
        return messages;
    }

    @Override
    public List<String> getSheets() {
        List<String> names = new ArrayList<>();

        if (dataFile == null) {
            try {
                load();
            } catch (Exception io) {
                LogMgr.logError("ExcelReader.getSheets()", "Could not load Excel file: " + inputFile.getFullPath(),
                        io);
                return names;
            }
        }

        int count = dataFile.getNumberOfSheets();
        for (int i = 0; i < count; i++) {
            names.add(dataFile.getSheetName(i));
        }
        return names;
    }

    @Override
    public void load() throws IOException {
        if (dataFile != null) {
            // do not load the file twice.
            return;
        }

        InputStream in = null;
        try {
            in = new FileInputStream(inputFile);
            if (useXLSX) {
                dataFile = new XSSFWorkbook(in);
            } else {
                dataFile = new HSSFWorkbook(in);
            }
        } finally {
            FileUtil.closeQuietely(in);
        }

        initActiveSheet();

        // TODO: find references to external files and update those as well
        // see: https://poi.apache.org/spreadsheet/eval.html
        try {
            if (useXLSX) {
                XSSFFormulaEvaluator.evaluateAllFormulaCells((XSSFWorkbook) dataFile);
            } else {
                HSSFFormulaEvaluator.evaluateAllFormulaCells((HSSFWorkbook) dataFile);
            }
        } catch (Exception ex) {
            LogMgr.logError("ExcelReader.load()", "Could not refresh formulas!", ex);
        }
    }

    private void initActiveSheet() {
        if (dataFile == null)
            return;

        if (sheetIndex > -1) {
            dataSheet = dataFile.getSheetAt(sheetIndex);
            if (dataSheet == null) {
                throw new IndexOutOfBoundsException(
                        "Sheet with index " + sheetIndex + " does not exist in file: " + inputFile.getFullPath());
            }
        } else if (sheetName != null) {
            dataSheet = dataFile.getSheet(sheetName);
            if (dataSheet == null) {
                throw new IllegalArgumentException(
                        "Sheet with name " + sheetName + " does not exist in file: " + inputFile.getFullPath());
            }
        } else {
            int index = dataFile.getActiveSheetIndex();
            dataSheet = dataFile.getSheetAt(index);
        }
        headerColumns = null;
        int numMergedRegions = dataSheet.getNumMergedRegions();
        mergedRegions = new ArrayList<>(numMergedRegions);
        for (int i = 0; i < numMergedRegions; i++) {
            mergedRegions.add(dataSheet.getMergedRegion(i));
        }
    }

    @Override
    public List<String> getHeaderColumns() {
        if (headerColumns == null) {
            headerColumns = new ArrayList<>();
            Row row = dataSheet.getRow(0);

            int colCount = row != null ? row.getLastCellNum() : 0;

            if (row == null || colCount == 0) {
                LogMgr.logError("ExcelReader.getHeaderColumns()",
                        "Cannot retrieve column names because no data is available in the first row of the sheet: "
                                + dataSheet.getSheetName(),
                        null);
                String msg = ResourceMgr.getFormattedString("ErrExportNoCols", dataSheet.getSheetName());
                messages.append(msg);
                messages.appendNewLine();
                return headerColumns;
            }

            for (int i = 0; i < colCount; i++) {
                Cell cell = row.getCell(i);
                Object value = getCellValue(cell);

                if (value != null) {
                    headerColumns.add(value.toString());
                } else {
                    headerColumns.add("Col" + Integer.toString(i));
                }
            }
        }
        return headerColumns;
    }

    @Override
    public void setActiveWorksheet(String name) {
        if (StringUtil.isNonBlank(name) && !StringUtil.equalStringIgnoreCase(name, sheetName)) {
            this.sheetName = name;
            this.sheetIndex = -1;
            initActiveSheet();
        }
    }

    @Override
    public void setActiveWorksheet(int index) {
        if (index > -1 && index != sheetIndex) {
            sheetIndex = index;
            sheetName = null;
            initActiveSheet();
        }
    }

    private boolean isTimestampFormat(String format) {
        for (String key : tsFormats) {
            if (format.contains(key))
                return true;
        }
        return false;
    }

    /**
     * This is a copy of the POI function DateUtil.getJavaDate().
     *
     * The POI function does not consider hours, minutes and seconds, which means
     * that columns with date <b>and</b> time are not retrieved correctly from an Excel file.
     *
     * @param date the "Excel" date
     * @return a properly initialized Java Date
     */
    private Date getJavaDate(double date) {
        int wholeDays = (int) Math.floor(date);
        int millisecondsInDay = (int) ((date - wholeDays) * ONE_DAY + 0.5);
        Calendar calendar = new GregorianCalendar(); // using default time-zone

        int startYear = 1900;
        int dayAdjust = -1; // Excel thinks 2/29/1900 is a valid date, which it isn't
        if (wholeDays < 61) {
            // Date is prior to 3/1/1900, so adjust because Excel thinks 2/29/1900 exists
            // If Excel date == 2/29/1900, will become 3/1/1900 in Java representation
            dayAdjust = 0;
        }

        int hours = (int) (millisecondsInDay / DurationFormatter.ONE_HOUR);
        millisecondsInDay -= (hours * DurationFormatter.ONE_HOUR);
        int minutes = (int) (millisecondsInDay / DurationFormatter.ONE_MINUTE);
        millisecondsInDay -= (minutes * DurationFormatter.ONE_MINUTE);
        int seconds = (int) Math.floor(millisecondsInDay / DurationFormatter.ONE_SECOND);
        millisecondsInDay -= (seconds * DurationFormatter.ONE_SECOND);
        calendar.set(startYear, 0, wholeDays + dayAdjust, hours, minutes, seconds);
        calendar.set(GregorianCalendar.MILLISECOND, millisecondsInDay);
        return calendar.getTime();
    }

    private boolean isMerged(Cell cell) {
        if (cell == null)
            return false;
        for (CellRangeAddress range : mergedRegions) {
            if (range.isInRange(cell.getRowIndex(), cell.getColumnIndex()))
                return true;
        }
        return false;
    }

    @Override
    public List<Object> getRowValues(int rowIndex) {
        Row row = dataSheet.getRow(rowIndex);
        ArrayList<Object> values = new ArrayList<>();

        if (row == null)
            return values;

        int nullCount = 0;
        int colCount = row.getLastCellNum();

        for (int col = 0; col < colCount; col++) {
            Cell cell = row.getCell(col);

            // treat rows with merged cells as "empty"
            if (isMerged(cell)) {
                LogMgr.logDebug("ExcelReader.getRowValues()", dataSheet.getSheetName() + ": column:"
                        + cell.getColumnIndex() + ", row:" + cell.getRowIndex() + " is merged. Ignoring row!");
                return Collections.emptyList();
            }

            Object value = getCellValue(cell);

            if (value == null) {
                nullCount++;
            }
            values.add(value);
        }

        if (nullCount == values.size()) {
            // return an empty list if all columns are null
            values.clear();
        }

        return values;
    }

    private Object getCellValue(Cell cell) {
        if (cell == null)
            return null;
        int type = cell.getCellType();
        if (type == Cell.CELL_TYPE_FORMULA) {
            type = cell.getCachedFormulaResultType();
        }

        Object value = null;

        switch (type) {
        case Cell.CELL_TYPE_BLANK:
        case Cell.CELL_TYPE_ERROR:
            value = null;
            break;
        case Cell.CELL_TYPE_NUMERIC:
            boolean isDate = HSSFDateUtil.isCellDateFormatted(cell);
            if (isDate) {
                if (useStringDates) {
                    value = dataFormatter.formatCellValue(cell);
                } else {
                    value = getDateValue(cell);
                }
            } else {
                double dv = cell.getNumericCellValue();
                value = Double.valueOf(dv);
            }
            break;
        default:
            String svalue = cell.getStringCellValue();
            if (isNullString(svalue)) {
                value = null;
            } else {
                value = svalue;
            }
        }
        return value;
    }

    private java.util.Date getDateValue(Cell cell) {
        java.util.Date dtValue = null;
        try {
            dtValue = cell.getDateCellValue();
        } catch (Exception ex) {
            // ignore
        }
        String fmt = cell.getCellStyle().getDataFormatString();
        double dv = cell.getNumericCellValue();
        if (dtValue == null) {
            dtValue = getJavaDate(dv);
        }

        if (dtValue != null) {
            if (isTimestampFormat(fmt)) {
                return new java.sql.Timestamp(dtValue.getTime());
            } else {
                return new java.sql.Date(dtValue.getTime());
            }
        }
        return null;
    }

    private boolean isNullString(String value) {
        if (value == null)
            return true;
        if (emptyStringIsNull && StringUtil.isEmptyString(value))
            return true;
        return StringUtil.equalString(value, nullString);
    }

    @Override
    public void setNullString(String nullString) {
        this.nullString = nullString;
    }

    @Override
    public int getRowCount() {
        return dataSheet.getLastRowNum() + 1;
    }

    @Override
    public void done() {
        dataSheet = null;
        dataFile = null;
    }

}