org.pentaho.di.trans.steps.excelinput.staxpoi.StaxPoiSheetTest.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.di.trans.steps.excelinput.staxpoi.StaxPoiSheetTest.java

Source

/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2018 by Hitachi Vantara : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.trans.steps.excelinput.staxpoi;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;

import java.io.InputStream;
import java.util.Collections;
import java.util.Date;
import java.util.Map;

import org.apache.commons.io.IOUtils;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.junit.Test;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTXf;
import org.pentaho.di.core.spreadsheet.KCell;
import org.pentaho.di.core.spreadsheet.KCellType;
import org.pentaho.di.core.spreadsheet.KSheet;

public class StaxPoiSheetTest {

    private static final String BP_SHEET = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
            + "<worksheet xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\" "
            + "xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\">" + "%s"
            + "</worksheet>";

    private static final String SHEET_DATE_NO_V = String.format(BP_SHEET,
            " <dimension ref=\"A1:A3\"/>" + " <sheetData>" + "   <row r=\"1\" spans=\"1:1\">"
                    + "     <c r=\"A1\" s=\"1\" t=\"s\"><v>0</v></c>" + "   </row>"
                    + "   <row r=\"2\" spans=\"1:1\">" + "     <c r=\"A2\" s=\"2\"><v>42248</v></c>" + "   </row>"
                    + "   <row r=\"3\" spans=\"1:1\">" + "     <c r=\"A3\" s=\"2\"/>" + "   </row>"
                    + " </sheetData>");

    private static final String SHEET_1 = String.format(BP_SHEET, " <dimension ref=\"B2:F5\"/>" + " <sheetData>"
            + "  <row r=\"2\" spans=\"2:6\"><c r=\"B2\" t=\"s\"><v>0</v></c><c r=\"C2\" t=\"s\"><v>1</v></c>"
            + "    <c r=\"D2\" t=\"s\"><v>2</v></c><c r=\"E2\" t=\"s\"><v>3</v></c><c r=\"F2\" t=\"s\"><v>4</v></c></row>"
            + "  <row r=\"3\" spans=\"2:6\"><c r=\"B3\" t=\"s\"><v>5</v></c><c r=\"C3\" s=\"1\"><v>40428</v></c>"
            + "    <c r=\"D3\"><v>75</v></c><c r=\"E3\" t=\"b\"><v>1</v></c><c r=\"F3\"><f>D3</f><v>75</v></c></row>"
            + "  <row r=\"4\" spans=\"2:6\"><c r=\"B4\" t=\"s\"><v>6</v></c><c r=\"C4\" s=\"1\"><v>40429</v></c>"
            + "    <c r=\"D4\"><v>42</v></c><c r=\"E4\" t=\"b\"><v>0</v></c><c r=\"F4\"><f>F3+D4</f><v>117</v></c></row>"
            + "  <row r=\"5\" spans=\"2:6\"><c r=\"B5\" t=\"s\"><v>7</v></c><c r=\"C5\" s=\"1\"><v>40430</v></c>"
            + "    <c r=\"D5\"><v>93</v></c><c r=\"E5\" t=\"b\"><v>1</v></c><c r=\"F5\"><f>F4+D5</f><v>210</v></c></row>"
            + " </sheetData>");

    private static final String SHEET_EMPTY = String.format(BP_SHEET, "<dimension ref=\"A1\"/><sheetData/>");

    private static final String SHEET_INLINE_STRINGS = String.format(BP_SHEET, "<dimension ref=\"A1:B3\"/>"
            + "<sheetViews>" + "<sheetView tabSelected=\"1\" workbookViewId=\"0\" rightToLeft=\"false\">"
            + "<selection activeCell=\"C5\" sqref=\"C5\"/>" + "</sheetView>" + "</sheetViews>"
            + "<sheetFormatPr defaultRowHeight=\"15\"/>" + "<sheetData>" + "<row outlineLevel=\"0\" r=\"1\">"
            + "<c r=\"A1\" s=\"0\" t=\"inlineStr\"><is><t>Test1</t></is></c>"
            + "<c r=\"B1\" s=\"0\" t=\"inlineStr\"><is><t>Test2</t></is></c>" + "</row>"
            + "<row outlineLevel=\"0\" r=\"2\">"
            + "<c r=\"A2\" s=\"0\" t=\"inlineStr\"><is><t>value 1 1</t></is></c>"
            + "<c r=\"B2\" s=\"0\" t=\"inlineStr\"><is><t>value 2 1</t></is></c>" + "</row>"
            + "<row outlineLevel=\"0\" r=\"3\">"
            + "<c r=\"A3\" s=\"0\" t=\"inlineStr\"><is><t>value 1 2</t></is></c>"
            + "<c r=\"B3\" s=\"0\" t=\"inlineStr\"><is><t>value 2 2</t></is></c>" + "</row>" + "</sheetData>");

    private static final String SHEET_NO_USED_RANGE_SPECIFIED = String.format(BP_SHEET, "<dimension ref=\"A1\" />"
            + "<sheetViews>" + "<sheetView tabSelected=\"1\" workbookViewId=\"0\">" + "<selection/>"
            + "</sheetView>" + "</sheetViews>"
            + "<sheetFormatPr defaultRowHeight=\"12.750000\" customHeight=\"true\"/>" + "<sheetData>"
            + "<row r=\"2\">" + "<c r=\"A2\" s=\"9\" t=\"s\">" + "<v>0</v>" + "</c><c r=\"B2\" s=\"9\" t=\"s\">"
            + "<v>0</v>" + "</c><c r=\"C2\" s=\"9\" t=\"s\">" + "<v>1</v>" + "</c><c r=\"D2\" s=\"9\" t=\"s\">"
            + "<v>2</v>" + "</c><c r=\"E2\" s=\"9\" t=\"s\">" + "<v>3</v>" + "</c>" + "</row>" + "<row r=\"3\">"
            + "<c r=\"A3\" s=\"11\" t=\"s\">" + "<v>4</v>" + "</c><c r=\"B3\" s=\"11\" t=\"s\">" + "<v>4</v>"
            + "</c><c r=\"C3\" s=\"11\" t=\"s\">" + "<v>5</v>" + "</c><c r=\"D3\" s=\"12\">" + "<v>2623</v>"
            + "</c><c r=\"E3\" s=\"11\" t=\"s\">" + "<v>6</v>" + "</c>" + "</row>" + "</sheetData>");

    @Test
    public void testNullDateCell() throws Exception {
        // cell had null value instead of being null
        final String sheetId = "1";
        final String sheetName = "Sheet 1";
        XSSFReader reader = mockXSSFReader(sheetId, SHEET_DATE_NO_V, mockSharedStringsTable("Some Date"),
                mockStylesTable(Collections.singletonMap(2, 165), Collections.singletonMap(165, "M/D/YYYY")));
        StaxPoiSheet spSheet = spy(new StaxPoiSheet(reader, sheetName, sheetId));
        doReturn(true).when(spSheet).isDateCell(any());
        KCell cell = spSheet.getRow(1)[0];
        assertNotNull(cell);
        assertEquals(KCellType.DATE, cell.getType());
        cell = spSheet.getRow(2)[0];
        assertNull("cell must be null", cell);
    }

    @Test
    public void testEmptySheet() throws Exception {
        XSSFReader reader = mockXSSFReader("sheet1", SHEET_EMPTY, mock(SharedStringsTable.class),
                mock(StylesTable.class));
        // check no exceptions
        StaxPoiSheet sheet = new StaxPoiSheet(reader, "empty", "sheet1");
        for (int j = 0; j < sheet.getRows(); j++) {
            sheet.getRow(j);
        }
    }

    @Test
    public void testReadSameRow() throws Exception {
        KSheet sheet1 = getSampleSheet();
        KCell[] row = sheet1.getRow(3);
        assertEquals("Two", row[1].getValue());
        row = sheet1.getRow(3);
        assertEquals("Two", row[1].getValue());
    }

    @Test
    public void testReadRowRA() throws Exception {
        KSheet sheet1 = getSampleSheet();
        KCell[] row = sheet1.getRow(4);
        assertEquals("Three", row[1].getValue());
        row = sheet1.getRow(2);
        assertEquals("One", row[1].getValue());
    }

    @Test
    public void testReadEmptyRow() throws Exception {
        KSheet sheet1 = getSampleSheet();
        KCell[] row = sheet1.getRow(0);
        assertEquals("empty row expected", 0, row.length);
    }

    @Test
    public void testReadCells() throws Exception {
        KSheet sheet = getSampleSheet();

        KCell cell = sheet.getCell(1, 2);
        assertEquals("One", cell.getValue());
        assertEquals(KCellType.LABEL, cell.getType());

        cell = sheet.getCell(2, 2);
        assertEquals(KCellType.DATE, cell.getType());
        assertEquals(new Date(1283817600000L), cell.getValue());

        cell = sheet.getCell(1, 3);
        assertEquals("Two", cell.getValue());
        assertEquals(KCellType.LABEL, cell.getType());
    }

    @Test
    public void testReadData() throws Exception {
        KSheet sheet1 = getSampleSheet();
        assertEquals(5, sheet1.getRows());

        KCell[] row = sheet1.getRow(2);
        assertEquals(KCellType.LABEL, row[1].getType());
        assertEquals("One", row[1].getValue());
        assertEquals(KCellType.DATE, row[2].getType());
        assertEquals(new Date(1283817600000L), row[2].getValue());
        assertEquals(KCellType.NUMBER, row[3].getType());
        assertEquals(Double.valueOf("75"), row[3].getValue());
        assertEquals(KCellType.BOOLEAN, row[4].getType());
        assertEquals(Boolean.TRUE, row[4].getValue());
        assertEquals(KCellType.NUMBER_FORMULA, row[5].getType());
        assertEquals(Double.valueOf("75"), row[5].getValue());

        row = sheet1.getRow(3);
        assertEquals(KCellType.LABEL, row[1].getType());
        assertEquals("Two", row[1].getValue());
        assertEquals(KCellType.DATE, row[2].getType());
        assertEquals(new Date(1283904000000L), row[2].getValue());
        assertEquals(KCellType.NUMBER, row[3].getType());
        assertEquals(Double.valueOf("42"), row[3].getValue());
        assertEquals(KCellType.BOOLEAN, row[4].getType());
        assertEquals(Boolean.FALSE, row[4].getValue());
        assertEquals(KCellType.NUMBER_FORMULA, row[5].getType());
        assertEquals(Double.valueOf("117"), row[5].getValue());

        row = sheet1.getRow(4);
        assertEquals(KCellType.LABEL, row[1].getType());
        assertEquals("Three", row[1].getValue());
        assertEquals(KCellType.DATE, row[2].getType());
        assertEquals(new Date(1283990400000L), row[2].getValue());
        assertEquals(KCellType.NUMBER, row[3].getType());
        assertEquals(Double.valueOf("93"), row[3].getValue());
        assertEquals(KCellType.BOOLEAN, row[4].getType());
        assertEquals(Boolean.TRUE, row[4].getValue());
        assertEquals(KCellType.NUMBER_FORMULA, row[5].getType());
        assertEquals(Double.valueOf("210"), row[5].getValue());

        try {
            row = sheet1.getRow(5);
            fail("No out of bounds exception thrown when expected");
        } catch (ArrayIndexOutOfBoundsException e) {
            // OK!
        }
    }

    private StaxPoiSheet getSampleSheet() throws Exception {
        String sheetId = "sheet1";
        XSSFReader reader = mockXSSFReader(sheetId, SHEET_1,
                mockSharedStringsTable("Col1Label", "Col2Date", "Col3Number", "Col4Boolean", "Col5NumFormula",
                        "One", "Two", "Three"),
                mockStylesTable(Collections.singletonMap(1, 14), Collections.<Integer, String>emptyMap()));
        return new StaxPoiSheet(reader, "Sheet 1", sheetId);
    }

    private XSSFReader mockXSSFReader(final String sheetId, final String sheetContent, final SharedStringsTable sst,
            final StylesTable styles) throws Exception {
        XSSFReader reader = mock(XSSFReader.class);
        when(reader.getSharedStringsTable()).thenReturn(sst);
        when(reader.getStylesTable()).thenReturn(styles);
        when(reader.getSheet(sheetId)).thenAnswer(new Answer<InputStream>() {
            public InputStream answer(InvocationOnMock invocation) throws Throwable {
                return IOUtils.toInputStream(sheetContent, "UTF-8");
            }
        });
        return reader;
    }

    private StylesTable mockStylesTable(final Map<Integer, Integer> styleToNumFmtId,
            final Map<Integer, String> numFmts) {
        StylesTable styles = mock(StylesTable.class);
        when(styles.getCellXfAt(any(Integer.class))).then(new Answer<CTXf>() {
            public CTXf answer(InvocationOnMock invocation) throws Throwable {
                int style = (int) invocation.getArguments()[0];
                Integer numFmtId = styleToNumFmtId.get(style);
                if (numFmtId != null) {
                    CTXf ctxf = CTXf.Factory.newInstance();
                    ctxf.setNumFmtId(numFmtId);
                    return ctxf;
                } else {
                    return null;
                }
            }
        });
        when(styles.getNumberFormatAt(any(Short.class))).then(new Answer<String>() {
            public String answer(InvocationOnMock invocation) throws Throwable {
                return numFmts.get(invocation.getArguments()[0]);
            }
        });
        return styles;
    }

    private SharedStringsTable mockSharedStringsTable(String... strings) {
        SharedStringsTable sst = new SharedStringsTable();
        for (String str : strings) {
            CTRst st = CTRst.Factory.newInstance();
            st.setT(str);
            sst.addEntry(st);
        }
        return sst;
    }

    @Test
    public void testInlineString() throws Exception {
        final String sheetId = "1";
        final String sheetName = "Sheet 1";
        XSSFReader reader = mockXSSFReader(sheetId, SHEET_INLINE_STRINGS, mock(SharedStringsTable.class),
                mock(StylesTable.class));
        StaxPoiSheet spSheet = new StaxPoiSheet(reader, sheetName, sheetId);
        KCell[] rowCells = spSheet.getRow(0);
        assertEquals("Test1", rowCells[0].getValue());
        assertEquals(KCellType.STRING_FORMULA, rowCells[0].getType());
        assertEquals("Test2", rowCells[1].getValue());
        assertEquals(KCellType.STRING_FORMULA, rowCells[1].getType());
        rowCells = spSheet.getRow(1);
        assertEquals("value 1 1", rowCells[0].getValue());
        assertEquals(KCellType.STRING_FORMULA, rowCells[0].getType());
        assertEquals("value 2 1", rowCells[1].getValue());
        assertEquals(KCellType.STRING_FORMULA, rowCells[1].getType());
        rowCells = spSheet.getRow(2);
        assertEquals("value 1 2", rowCells[0].getValue());
        assertEquals(KCellType.STRING_FORMULA, rowCells[0].getType());
        assertEquals("value 2 2", rowCells[1].getValue());
        assertEquals(KCellType.STRING_FORMULA, rowCells[1].getType());
    }

    // The row and column bounds of all cells in the worksheet are specified in ref attribute of Dimension tag in sheet
    // xml
    // But ref can be present as range: <dimension ref="A1:C2"/> or as just one start cell: <dimension ref="A1"/>.
    // Below tests to validate correct work for such cases
    @Test
    public void testNoUsedRangeSpecified() throws Exception {
        final String sheetId = "1";
        final String sheetName = "Sheet 1";
        SharedStringsTable sharedStringsTableMock = mockSharedStringsTable("Report ID", "Report ID",
                "Approval Status", "Total Report Amount", "Policy", "ReportIdValue_1", "ReportIdValue_1",
                "ApprovalStatusValue_1", "PolicyValue_1");
        XSSFReader reader = mockXSSFReader(sheetId, SHEET_NO_USED_RANGE_SPECIFIED, sharedStringsTableMock,
                mock(StylesTable.class));
        StaxPoiSheet spSheet = new StaxPoiSheet(reader, sheetName, sheetId);
        // The first row is empty - it should have empty rowCells
        KCell[] rowCells = spSheet.getRow(0);
        assertEquals(0, rowCells.length);
        // The second row - is the header - just skip it
        rowCells = spSheet.getRow(1);
        assertEquals(0, rowCells.length);
        // The row3 - is the first row with data - validating it
        rowCells = spSheet.getRow(2);
        assertEquals(KCellType.LABEL, rowCells[0].getType());
        assertEquals("ReportIdValue_1", rowCells[0].getValue());
        assertEquals(KCellType.LABEL, rowCells[1].getType());
        assertEquals("ReportIdValue_1", rowCells[1].getValue());
        assertEquals(KCellType.LABEL, rowCells[2].getType());
        assertEquals("ApprovalStatusValue_1", rowCells[2].getValue());
        assertEquals(KCellType.NUMBER, rowCells[3].getType());
        assertEquals(2623.0, rowCells[3].getValue());
        assertEquals(KCellType.LABEL, rowCells[4].getType());
        assertEquals("PolicyValue_1", rowCells[4].getValue());
    }

}