org.databene.platform.xls.SingleSheetXLSEntityIterator.java Source code

Java tutorial

Introduction

Here is the source code for org.databene.platform.xls.SingleSheetXLSEntityIterator.java

Source

/*
 * (c) Copyright 2014 by Volker Bergmann. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, is permitted under the terms of the
 * GNU General Public License (GPL).
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * WITHOUT A WARRANTY OF ANY KIND. ALL EXPRESS OR IMPLIED CONDITIONS,
 * REPRESENTATIONS AND WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE
 * HEREBY EXCLUDED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

package org.databene.platform.xls;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.databene.benerator.engine.BeneratorContext;
import org.databene.commons.ArrayUtil;
import org.databene.commons.CollectionUtil;
import org.databene.commons.ConfigurationError;
import org.databene.commons.Converter;
import org.databene.commons.IOUtil;
import org.databene.commons.StringUtil;
import org.databene.commons.converter.ConverterManager;
import org.databene.commons.converter.ToStringConverter;
import org.databene.formats.DataContainer;
import org.databene.formats.DataIterator;
import org.databene.formats.util.OrthogonalArrayIterator;
import org.databene.formats.util.ThreadLocalDataContainer;
import org.databene.formats.xls.XLSLineIterator;
import org.databene.model.data.ComplexTypeDescriptor;
import org.databene.model.data.ComponentDescriptor;
import org.databene.model.data.DataModel;
import org.databene.model.data.Entity;
import org.databene.platform.array.Array2EntityConverter;

/**
 * Iterates a single sheet of an XLS document and maps its rows to entities.<br><br>
 * Created: 23.06.2014 17:20:19
 * @since 0.9.5
 * @author Volker Bergmann
 */

public class SingleSheetXLSEntityIterator implements DataIterator<Entity> {

    private String uri;
    private Workbook workbook;
    private boolean rowBased;
    private boolean formatted;
    private String emptyMarker;
    private DataIterator<Object[]> source;
    private Converter<String, ?> preprocessor;
    private Array2EntityConverter converter;
    private Object[] buffer;
    private ThreadLocalDataContainer<Object[]> sourceContainer = new ThreadLocalDataContainer<Object[]>();
    private ComplexTypeDescriptor entityDescriptor;
    private BeneratorContext context;
    private String[] headers;

    // constructors ----------------------------------------------------------------------------------------------------

    public SingleSheetXLSEntityIterator(String uri, String sheetName, Converter<String, ?> preprocessor,
            ComplexTypeDescriptor entityType, BeneratorContext context, boolean rowBased, boolean formatted,
            String emptyMarker) throws InvalidFormatException, IOException {
        this(loadSheet(uri, sheetName), preprocessor, entityType, context, rowBased, formatted, emptyMarker);
        this.uri = uri;
    }

    public SingleSheetXLSEntityIterator(Sheet sheet, Converter<String, ?> preprocessor,
            ComplexTypeDescriptor entityDescriptor, BeneratorContext context, boolean rowBased, boolean formatted,
            String emptyMarker) {
        this.workbook = sheet.getWorkbook();
        this.preprocessor = preprocessor;
        this.context = context;
        this.rowBased = rowBased;
        this.formatted = formatted;
        this.emptyMarker = emptyMarker;
        this.source = createRawIterator(sheet, rowBased, preprocessor);

        // if not specified explicitly, determine entity type by sheet name
        this.entityDescriptor = entityDescriptor;
        if (this.entityDescriptor == null) {
            String entityTypeName = sheet.getSheetName();
            if (context != null) {
                DataModel dataModel = context.getDataModel();
                this.entityDescriptor = (ComplexTypeDescriptor) dataModel.getTypeDescriptor(entityTypeName);
                if (this.entityDescriptor != null)
                    this.entityDescriptor = new ComplexTypeDescriptor(entityTypeName + "_",
                            context.getLocalDescriptorProvider());
                else
                    this.entityDescriptor = createDescriptor(entityTypeName);
            } else
                this.entityDescriptor = createDescriptor(entityTypeName);
        }

        // parse headers
        parseHeaders();
        if (headers == null) {
            this.source = null; // empty sheet
            return;
        }

        // parse first data row
        DataContainer<Object[]> tmp = this.source.next(sourceContainer.get());
        if (tmp == null) {
            this.source = null; // no data in sheet
            return;
        }
        this.buffer = tmp.getData();
        converter = new Array2EntityConverter(this.entityDescriptor, headers, false);
    }

    // DataIterator interface implementation ---------------------------------------------------------------------------

    @Override
    public Class<Entity> getType() {
        return Entity.class;
    }

    @Override
    public DataContainer<Entity> next(DataContainer<Entity> container) {
        if (source == null)
            return null;
        Object[] rawData;
        if (buffer != null) {
            rawData = buffer;
            buffer = null;
        } else {
            DataContainer<Object[]> tmp = source.next(sourceContainer.get());
            if (tmp == null)
                return null;
            rawData = tmp.getData();
        }
        resolveCollections(rawData);
        Entity result = converter.convert(rawData);
        return container.setData(result);
    }

    @Override
    public void close() {
        IOUtil.close(source);
    }

    // convenience methods ---------------------------------------------------------------------------------------------

    public static List<Entity> parseAll(String uri, String sheetName, Converter<String, ?> preprocessor,
            ComplexTypeDescriptor type, BeneratorContext context, boolean rowBased, boolean formatted,
            String emptyMarker) throws IOException, InvalidFormatException {
        Sheet sheet = loadSheet(uri, sheetName);
        return parseAll(sheet, preprocessor, type, context, rowBased, formatted, emptyMarker);
    }

    public static List<Entity> parseAll(Sheet sheet, Converter<String, ?> preprocessor, ComplexTypeDescriptor type,
            BeneratorContext context, boolean rowBased, boolean formatted, String emptyMarker) {
        List<Entity> list = new ArrayList<Entity>();
        SingleSheetXLSEntityIterator iterator = new SingleSheetXLSEntityIterator(sheet, preprocessor, type, context,
                rowBased, formatted, emptyMarker);
        DataContainer<Entity> container = new DataContainer<Entity>();
        while ((container = iterator.next(container)) != null)
            list.add(container.getData());
        return list;
    }

    // private helper methods --------------------------------------------------

    private static Sheet loadSheet(String uri, String sheetName) throws InvalidFormatException, IOException {
        Workbook workbook = WorkbookFactory.create(IOUtil.getInputStreamForURI(uri));
        Sheet sheet = workbook.getSheet(sheetName);
        if (sheet == null)
            throw new ConfigurationError("Sheet '" + sheetName + "' not found in file " + uri);
        return sheet;
    }

    private void parseHeaders() {
        DataContainer<Object[]> tmp = this.source.next(sourceContainer.get());
        this.headers = (tmp != null ? normalizeHeaders(tmp.getData()) : null);
    }

    private static String[] normalizeHeaders(Object[] rawHeaders) {
        String[] headers = (String[]) ConverterManager.convertAll(rawHeaders, new ToStringConverter(),
                String.class);
        StringUtil.trimAll(headers);
        int lastNonEmptyIndex = headers.length - 1;
        while (lastNonEmptyIndex >= 0 && StringUtil.isEmpty(headers[lastNonEmptyIndex]))
            lastNonEmptyIndex--;
        if (lastNonEmptyIndex < headers.length - 1)
            headers = ArrayUtil.copyOfRange(headers, 0, lastNonEmptyIndex + 1);
        return headers;
    }

    private DataIterator<Object[]> createRawIterator(Sheet sheet, boolean rowBased,
            Converter<String, ?> preprocessor) {
        XLSLineIterator iterator = new XLSLineIterator(sheet, false, formatted, preprocessor);
        if (emptyMarker != null)
            iterator.setEmptyMarker(emptyMarker);
        if (!rowBased)
            return new OrthogonalArrayIterator<Object>(iterator);
        return iterator;
    }

    private ComplexTypeDescriptor createDescriptor(String entityTypeName) {
        ComplexTypeDescriptor descriptor;
        descriptor = new ComplexTypeDescriptor(entityTypeName, context.getLocalDescriptorProvider());
        context.addLocalType(descriptor);
        return descriptor;
    }

    private void resolveCollections(Object[] rawData) {
        String colRefPrefix = PlatformDescriptor.getCollectionReferencePrefix();
        for (int i = 0; i < rawData.length; i++) {
            Object cellValue = rawData[i];
            if (cellValue instanceof String && ((String) cellValue).startsWith(colRefPrefix)) {
                String tabName = ((String) cellValue).substring(colRefPrefix.length());
                ComponentDescriptor component = entityDescriptor.getComponent(headers[i]);
                ComplexTypeDescriptor componentType = (component != null
                        ? (ComplexTypeDescriptor) component.getTypeDescriptor()
                        : null);
                rawData[i] = mapTabToArray(tabName, componentType);
            }
        }
    }

    private Entity[] mapTabToArray(String tabName, ComplexTypeDescriptor type) {
        Sheet sheet = getSheet(tabName);
        List<Entity> elements = parseAll(sheet, preprocessor, type, context, rowBased, formatted, emptyMarker);
        return CollectionUtil.toArray(elements, Entity.class);
    }

    private Sheet getSheet(String tabName) {
        for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
            Sheet candidate = workbook.getSheetAt(i);
            if (candidate.getSheetName().trim().equalsIgnoreCase(tabName.trim()))
                return candidate;
        }
        // tab not found
        throw new ConfigurationError("Tab '" + tabName + "' not found" + (uri != null ? " in " + uri : ""));
    }

    // java.lang.Object overrides --------------------------------------------------------------------------------------

    @Override
    public String toString() {
        return getClass().getSimpleName() + "[" + source + "]";
    }

}