com.itemanalysis.jmetrik.file.JmetrikFileReader.java Source code

Java tutorial

Introduction

Here is the source code for com.itemanalysis.jmetrik.file.JmetrikFileReader.java

Source

/*
 * Copyright (c) 2014 Patrick Meyer
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package com.itemanalysis.jmetrik.file;

import com.itemanalysis.jmetrik.data.*;
import com.itemanalysis.psychometrics.data.*;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.input.BOMInputStream;

import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.LinkedHashMap;

/**
 * Reads a *.jmetrik file. It will return the file header information as a map of VariableAttributes.
 * It work like a record set. An example usage is:
 *
 * File f = new File(fileName);
 * JmetrikReader reader = new JmetrikReader(f);
 *
 * try{
 *      reader.openConnection();
 *      LinkedHashMap<VariableName, VariableAttributes> variableAttributes = reader.getVariableAttributes();
 *
 *      //Assume a variable called sid exists in the file.
 *      VariableName sid = new VariableName("sid");
 *
 *     //Read data
 *     JmetrikCSVRecord jmetrikCSVRecord = null;
 *     while(reader.hasNext()){
 *         jmetrikCSVRecord = reader.next();
 *         System.out.println(jmetrikCSVRecord.get(sid));
 *     }
 * }catch(IOException ex){
 *     ex.printStackTrace();
 * }finally{
 *     if(reader!=null) reader.closeConnection();
 * }
 *
 *
 *
 *
 */
public class JmetrikFileReader implements AutoCloseable {

    private Path f = null;
    private String version = "";
    private int nrow = 0;
    private int ncol = 0;
    private CSVParser dataParser = null;
    private Iterator<CSVRecord> dataIterator = null;
    private Reader dataReader = null;
    private CSVRecord nextRecord = null;
    private LinkedHashMap<VariableName, VariableAttributes> variableAttributes = null;
    JmetrikCSVRecord jmetrikCSVRecord = null;

    public JmetrikFileReader(File f) {
        this.f = f.toPath();
    }

    public JmetrikFileReader(Path f) {
        this.f = f;
    }

    /**
     * Reads the header of a *.jmetrik file and returns a map of the attributes.
     *
     * @return map of the file attributes
     * @throws IOException
     */
    public LinkedHashMap<VariableName, VariableAttributes> getVariableAttributes() throws IOException {
        return variableAttributes;
    }

    /**
     * Extracts VariableAttributes from the file header.
     * 
     * @param csvRecordIterator
     */
    private void setVariableAttributes(Iterator<CSVRecord> csvRecordIterator) {
        variableAttributes = new LinkedHashMap<VariableName, VariableAttributes>();
        boolean readAttributes = false;
        boolean readData = false;
        String name = "";
        String type = "";
        String scoring = "";
        String codes = "";
        String label = "";
        String itemGroup = "";
        DataType dataType = null;
        VariableAttributes temp = null;
        GenericItemScoring itemScoring = null;
        SpecialDataCodes specialDataCodes = null;
        String comment = "";
        while (csvRecordIterator.hasNext() && !readData) {
            nextRecord = csvRecordIterator.next();
            comment = nextRecord.getComment();

            if ("VERSION".equals(comment)) {
                version = nextRecord.get(0);
            } else if ("METADATA".equals(comment)) {
                nrow = Integer.parseInt(nextRecord.get(0));
            } else if ("ATTRIBUTES".equals(comment)) {
                readAttributes = true;
            } else if ("DATA".equals(comment)) {
                readData = true;
                readAttributes = false;
            }

            if (readAttributes) {
                name = nextRecord.get(0);
                type = nextRecord.get(1);
                scoring = nextRecord.get(2);
                codes = nextRecord.get(3);
                itemGroup = nextRecord.get(4);
                label = nextRecord.get(5);
                dataType = DataType.INTEGER;

                if (DataType.DOUBLE.toString().equals(type)) {
                    dataType = DataType.DOUBLE;
                } else if (DataType.STRING.toString().equals(type)) {
                    dataType = DataType.STRING;
                }

                temp = new VariableAttributes(new VariableName(name), new VariableLabel(label), dataType, ncol);
                temp.setItemGroup(itemGroup);

                itemScoring = new GenericItemScoring(name, scoring);

                //                    itemScoring = new GenericItemScoring(name);
                //                    itemScoring.parseItemScoring(scoring);

                //only set scoring if scoring was provided
                if (itemScoring.getItemType() != ItemType.NOT_ITEM) {
                    temp.setItemScoring(itemScoring);
                }

                specialDataCodes = new SpecialDataCodes();
                specialDataCodes.parseSpecialCodeString(codes);
                temp.setSpecialDataCodes(specialDataCodes);

                variableAttributes.put(temp.getName(), temp);

                ncol++;
            }
        }

    }

    /**
     * Moves cursor past the header to the first line of data
     */
    private void advanceToFirstDataRecord() {
        boolean header = true;
        while (dataIterator.hasNext() && header) {
            nextRecord = dataIterator.next();
            if ("DATA".equals(nextRecord.getComment()))
                header = false;
        }
    }

    /**
     * Opens a connection by instantiating the reader and CSVParser. It extracts the
     * header into the VariableAttributeMap and moves the cursor to the first row of data.
     * 
     * @throws IOException
     */
    public void openConnection() throws IOException {
        //opens file and advances cursor to beginning of data
        dataReader = new InputStreamReader(new BOMInputStream(Files.newInputStream(f)), "UTF-8");
        dataParser = new CSVParser(dataReader, CSVFormat.DEFAULT.withCommentMarker('#'));
        dataIterator = dataParser.iterator();
        setVariableAttributes(dataIterator);

        //Get Column names
        String[] colNames = new String[variableAttributes.size()];
        int index = 0;
        for (VariableName v : variableAttributes.keySet()) {
            colNames[index] = v.toString();
            index++;
        }
        dataReader.close();
        dataParser.close();

        //Advance iterator to first data record (A little inefficient because loops over header a second time)
        //This inefficiency is because CSVReader only allows the header to be set in the constructor.
        dataReader = new InputStreamReader(new BOMInputStream(Files.newInputStream(f)), "UTF-8");
        dataParser = new CSVParser(dataReader, CSVFormat.DEFAULT.withCommentMarker('#').withHeader(colNames));
        dataIterator = dataParser.iterator();
        advanceToFirstDataRecord();
    }

    /**
     * Closes the reader and parser. This method is part of the Autocloseable interface.
     * @throws IOException
     */
    public void close() throws IOException {
        if (dataReader != null)
            dataReader.close();
        if (dataParser != null)
            dataParser.close();
    }

    /**
     * Returns the the attributes for a specific variable in the file.
     * 
     * @param variableName name of variable for which the attributes are sought.
     * @return
     */
    public VariableAttributes getVariableAttributesAt(VariableName variableName) {
        if (variableAttributes.keySet().contains(variableName)) {
            return variableAttributes.get(variableName);
        }
        return null;
    }

    /**
     * Gets the number of rows (i.e. cases) in the file. This information is not computed.
     * Rather, it is retrieved from the file header.
     * 
     * @return
     */
    public int getNumberOfRows() {
        return nrow;
    }

    /**
     * Gets the number of columns or number of variables in the data file.
     * 
     * @return number of columns
     */
    public int getNumberOfColumns() {
        return ncol;
    }

    /**
     * Moves the cursor to the next row of data.
     * 
     * @return a row of data as a JmetrikCSVRecord
     */
    public JmetrikCSVRecord next() {
        jmetrikCSVRecord = new JmetrikCSVRecord(nextRecord, variableAttributes);
        if (dataIterator.hasNext()) {
            nextRecord = dataIterator.next();
        } else {
            nextRecord = null;
        }
        return jmetrikCSVRecord;
    }

    /**
     * Check to see if there is another row of data.
     * 
     * @return true if another row exists, false otherwise.
     */
    public boolean hasNext() {
        if (dataIterator.hasNext())
            return true;
        return nextRecord != null;
    }

    /**
     * Gets all available data.
     *
     * @return an array of data.
     */
    public Object[][] getData() {
        return getData(Integer.MAX_VALUE, Integer.MAX_VALUE);
    }

    /**
     * Reads the first maxRow rows and the first maxCol columns and returns them as an Object[][] array.
     * This method is mainly used for creating a TableModel and displaying data in a JTable.
     * This method can only be called once. To call it a second time, close the connection and reopen it.
     *
     * @param maxRow maximum number of rows to read.
     * @param maxCol maximum number of columns to read.
     * @return array of data values.
     */
    public Object[][] getData(int maxRow, int maxCol) {
        int r = Math.min(nrow, maxRow);
        int c = Math.min(ncol, maxCol);

        Object[][] data = new Object[r][c];
        int i = 0;
        int j = 0;
        JmetrikCSVRecord record = null;
        VariableAttributes tempAttributes = null;

        while (this.hasNext()) {
            record = this.next();
            j = 0;
            inner: for (VariableName v : variableAttributes.keySet()) {
                tempAttributes = variableAttributes.get(v);

                if (tempAttributes.getDataType() == DataType.DOUBLE) {
                    if (tempAttributes.isMissing(record.originalValue(v))) {
                        data[i][j] = null;
                    } else {
                        data[i][j] = record.valueOfAsDouble(v);
                    }

                } else if (tempAttributes.getDataType() == DataType.INTEGER) {
                    if (tempAttributes.isMissing(record.originalValue(v))) {
                        data[i][j] = null;
                    } else {
                        data[i][j] = record.valueOfAsInt(v);
                    }
                } else {
                    if (tempAttributes.isMissing(record.originalValue(v))) {
                        data[i][j] = null;
                    } else {
                        data[i][j] = record.originalValue(v);
                    }
                }

                j++;
                if (j == maxCol)
                    break inner;
            }
            i++;
            if (i == maxRow)
                break;
        }
        return data;
    }

    /**
     * Get an array of all column classes.
     * @return
     */
    public Class[] getColumnClasses() {
        return getColumnClasses(Integer.MAX_VALUE);
    }

    /**
     * Creates an array of classes based on the type of data contained in the first maxCol variables.
     * This method is mainly used for creating a TableModel and displaying data in a JTable.
     *
     * @param maxCol maximum number of columns to read.
     * @return array of column classes.
     */
    public Class[] getColumnClasses(int maxCol) {
        int c = Math.min(ncol, maxCol);
        int j = 0;
        Class[] colClass = new Class[c];
        VariableAttributes tempAttributes = null;

        for (VariableName v : variableAttributes.keySet()) {
            tempAttributes = variableAttributes.get(v);
            if (tempAttributes.getDataType() == DataType.DOUBLE) {
                colClass[j] = Double.class;
            } else if (tempAttributes.getDataType() == DataType.INTEGER) {
                colClass[j] = Integer.class;
            } else {
                colClass[j] = String.class;
            }
            j++;
            if (j == c)
                break;
        }
        return colClass;
    }

    public String[] getColumnNames() {
        return getColumnNames(Integer.MAX_VALUE);
    }

    /**
     * Gets an array of column name for the first maxCol variables.
     * This method is mainly used for creating a TableModel and displaying data in a JTable.
     *
     * @param maxCol maximum number of columns to read.
     * @return
     */
    public String[] getColumnNames(int maxCol) {
        int c = Math.min(ncol, maxCol);
        int j = 0;
        String[] colName = new String[c];
        for (VariableName v : variableAttributes.keySet()) {
            colName[j] = v.toString();
            j++;
            if (j == c)
                break;
        }
        return colName;
    }

}