com.my.diff.MyDKCSVFileSource.java Source code

Java tutorial

Introduction

Here is the source code for com.my.diff.MyDKCSVFileSource.java

Source

/**
 * Copyright 2010-2011 Joseph Panico
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.my.diff;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.net.URI;
import java.text.ParseException;
import java.util.Arrays;

import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.ClassUtils;
import org.apache.commons.lang.NotImplementedException;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.diffkit.common.DKRuntime;
import org.diffkit.common.DKUserException;
import org.diffkit.common.DKValidate;
import org.diffkit.common.annot.NotThreadSafe;
import org.diffkit.diff.engine.DKColumnModel;
import org.diffkit.diff.engine.DKContext;
import org.diffkit.diff.engine.DKSource;
import org.diffkit.diff.engine.DKTableModel;
import org.diffkit.util.DKArrayUtil;
import org.diffkit.util.DKFileUtil;

import au.com.bytecode.opencsv.CSVReader;

/**
 * @author jpanico
 */
@NotThreadSafe
public class MyDKCSVFileSource implements DKSource {
    private final File _file;
    private final String _delimiter;
    /**
     * read from the first line of actual file
     */
    private String[] _headerColumnNames;
    private DKTableModel _model;
    private final String[] _keyColumnNames;
    /**
     * DKColumnModel indices
     */
    private final int[] _readColumnIdxs;
    private DKColumnModel[] _readColumns;
    private final boolean _isSorted;
    private final boolean _validateLazily;
    private transient boolean _isOpen;
    private transient long _lastIndex = -1;
    private transient CSVReader _lineReader;
    private final Logger _log = LoggerFactory.getLogger(this.getClass());

    /**
     * @param readColumnIdxs_
     *           instructs Source to only read a subset of columns. null value
     *           means all Columns will be read and must be modelled
     */
    public MyDKCSVFileSource(String filePath_, DKTableModel model_, int[] readColumnIdxs_, String delimiter_)
            throws IOException {
        this(filePath_, model_, null, readColumnIdxs_, delimiter_, true, true);
    }

    /**
     * @param readColumnIdxs_
     *           instructs Source to only read a subset of columns. null value
     *           means all Columns will be read and must be modelled
     */
    public MyDKCSVFileSource(String filePath_, DKTableModel model_, String[] keyColumnNames_, int[] readColumnIdxs_,
            String delimiter_, boolean isSorted_, boolean validateLazily_) throws IOException {
        _log.debug("filePath_->{}", filePath_);
        _log.debug("model_->{}", model_);
        _log.debug("keyColumnNames_->{}", keyColumnNames_);
        _log.debug("readColumnIdxs_->{}", readColumnIdxs_);
        _log.debug("delimiter_->{}", delimiter_);
        _log.debug("isSorted_->{}", isSorted_);
        _log.debug("validateLazily_->{}", validateLazily_);
        if ((model_ != null) && (keyColumnNames_ != null))
            throw new RuntimeException(
                    String.format("does not allow both %s and %s params", "model_", "keyColumnNames_"));
        _file = DKFileUtil.findFile(filePath_);
        _delimiter = delimiter_;
        _model = model_;
        _keyColumnNames = keyColumnNames_;
        _readColumnIdxs = readColumnIdxs_;
        if (_readColumnIdxs != null)
            throw new NotImplementedException(
                    String.format("_readColumnIdxs->%s is not currently supported", _readColumnIdxs));
        _isSorted = isSorted_;
        _validateLazily = validateLazily_;
        DKValidate.notNull(_delimiter);
        if (!_isSorted)
            throw new NotImplementedException(String.format("isSorted_->%s is not currently supported", _isSorted));
        if (!_validateLazily) {
            if (_file == null)
                throw new RuntimeException(String.format("could not find file for filePath_->%s", filePath_));
            this.open();
        }
    }

    public File getFile() {
        return _file;
    }

    public String getDelimeter() {
        return _delimiter;
    }

    public DKTableModel getModel() {
        if (_model != null)
            return _model;
        try {
            this.open();
        } catch (IOException e_) {
            throw new RuntimeException(e_);
        }
        int[] keyColumnIndices = null;
        if (_keyColumnNames == null)
            keyColumnIndices = new int[] { 0 };
        else
            keyColumnIndices = this.getHeaderColumnNameIndices(_keyColumnNames);
        _model = DKTableModel.createGenericStringModel(_headerColumnNames, keyColumnIndices);
        return _model;
    }

    private int[] getHeaderColumnNameIndices(String[] names_) {
        if (names_ == null)
            return null;
        int[] indices = new int[names_.length];
        Arrays.fill(indices, -1);
        for (int i = 0, j = 0; i < names_.length; i++) {
            int foundAt = ArrayUtils.indexOf(_headerColumnNames, names_[i]);
            if (foundAt < 0)
                throw new RuntimeException(String.format("no value in _headerColumnNames for %s", names_[i]));
            indices[j++] = foundAt;
        }
        return DKArrayUtil.compactFill(indices, -1);
    }

    public String[] getKeyColumnNames() {
        return _keyColumnNames;
    }

    public int[] getReadColumnIdxs() {
        return _readColumnIdxs;
    }

    public boolean getIsSorted() {
        return _isSorted;
    }

    public boolean getValidateLazily() {
        return _validateLazily;
    }

    public Kind getKind() {
        return Kind.FILE;
    }

    public URI getURI() throws IOException {
        return _file.toURI();
    }

    public String toString() {
        if (DKRuntime.getInstance().getIsTest())
            return _file.getName();
        return String.format("%s@%x[%s]", ClassUtils.getShortClassName(this.getClass()),
                System.identityHashCode(this), _file.getPath());
    }

    public Object[] getNextRow() throws IOException {
        this.ensureOpen();
        String[] line = _lineReader.readNext();
        //String line = this.readLine();
        if (line == null)
            return null;
        _lastIndex++;
        return createRow(line);
        //return line;
    }

    /**
     * skips blank lines
     * 
     * @return null only when EOF is reached
     */
    private String readLine() throws IOException {
        while (true) {
            String line = null;//lineReader.readNext();
            if (line == null)
                return null;
            line = StringUtils.trimToNull(line);
            if (line != null)
                return line;
        }
    }

    private Object[] createRow(String line_) throws IOException {
        if (line_ == null)
            return null;
        String[] strings = line_.split(_delimiter, -1);
        DKColumnModel[] readColumns = this.getReadColumns();
        if (strings.length != readColumns.length)
            throw new RuntimeException(String.format("columnCount->%s in row->%s does not match modelled table->%s",
                    strings.length, Arrays.toString(strings), _model));
        try {
            Object[] row = new Object[strings.length];
            for (int i = 0; i < strings.length; i++) {
                row[i] = readColumns[i].parseObject(strings[i]);
            }
            return row;
        } catch (ParseException e_) {
            _log.error(null, e_);
            throw new RuntimeException(e_);
        }
    }

    private Object[] createRow(String[] strings) throws IOException {
        if (strings == null)
            return null;
        //String[] strings = line_.split(_delimiter, -1);
        DKColumnModel[] readColumns = this.getReadColumns();
        if (strings.length != readColumns.length)
            throw new RuntimeException(String.format("columnCount->%s in row->%s does not match modelled table->%s",
                    strings.length, Arrays.toString(strings), _model));
        try {
            Object[] row = new Object[strings.length];
            for (int i = 0; i < strings.length; i++) {
                row[i] = readColumns[i].parseObject(strings[i]);
            }
            return row;
        } catch (ParseException e_) {
            _log.error(null, e_);
            throw new RuntimeException(e_);
        }
    }

    private DKColumnModel[] getReadColumns() {
        if (_readColumns != null)
            return _readColumns;
        DKTableModel model = this.getModel();
        if (model == null)
            return null;
        _readColumns = model.getColumns();
        return _readColumns;
    }

    public long getLastIndex() {
        return _lastIndex;
    }

    // @Override
    public void close(DKContext context_) throws IOException {
        this.ensureOpen();
        _lineReader.close();
        _lineReader = null;
        _isOpen = false;
    }

    private void validateFile() {
        if (!_file.canRead())
            throw new DKUserException(String.format("can't read file [%s]", _file));
    }

    // @Override
    public void open(DKContext context_) throws IOException {
        this.open();
    }

    private void open() throws IOException {
        if (_isOpen)
            return;
        _isOpen = true;
        this.validateFile();
        //_lineReader = new CSVReader(new FileReader(_file), ',', '"', 1);
        _lineReader = new CSVReader(new BufferedReader(new FileReader(_file)));
        //_lineReader = new LineNumberReader(new BufferedReader(new FileReader(_file)));
        this.readHeader();
    }

    private void readHeader() throws IOException {
        String[] line = _lineReader.readNext();
        _log.info("header->{}", line);
        //_headerColumnNames = line.split(_delimiter);
        _headerColumnNames = line;
    }

    private void ensureOpen() {
        if (!_isOpen)
            throw new RuntimeException("not open!");
    }
}