com.itemanalysis.jmetrik.file.JmetrikFileImporter.java Source code

Introduction

Here is the source code for com.itemanalysis.jmetrik.file.JmetrikFileImporter.java
Source

/*
 * Copyright (c) 2014 Patrick Meyer
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package com.itemanalysis.jmetrik.file;

import com.itemanalysis.jmetrik.manager.ImportDataCommand;
import com.itemanalysis.psychometrics.data.*;
import com.itemanalysis.psychometrics.tools.StopWatch;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.input.BOMInputStream;
import org.apache.log4j.Logger;

import javax.swing.*;
import java.io.*;
import java.util.*;

/**
 * This class converts a delimited file into a *.jmetrik file. It scans the delimited data file
 * to determine the variable names and type of data. It may also read an item scoring file to
 * add attributes about item scoring. Finally, it also obtain attribute information from the
 * import command such as the missing data codes.
 *
 * When attaching item scoring to variable attributes, the variables are matched by name. If no
 * match is found, the item scoring is ignored.
 *
 */
public class JmetrikFileImporter extends SwingWorker<String, Void> {

    private boolean hasHeader = true;
    private boolean overwrite = false;
    private int nrow = 0;
    private File dataFile = null;
    private String[] colNames = null;
    private File outputFile = null;
    private SpecialDataCodes specialDataCodes = null;
    private ImportDataCommand command = null;
    private LinkedHashMap<VariableName, VariableAttributes> variableAttributeMap = null;
    private Throwable theException = null;
    private StopWatch sw = null;
    private CSVFormat dataFileFormat = CSVFormat.DEFAULT;
    static Logger logger = Logger.getLogger("jmetrik-logger");
    static Logger scriptLogger = Logger.getLogger("jmetrik-script-logger");

    int tempCount = 0;

    public JmetrikFileImporter(ImportDataCommand command) {
        this.command = command;
        variableAttributeMap = new LinkedHashMap<VariableName, VariableAttributes>();
        sw = new StopWatch();
    }

    private void parseCommand() {
        dataFile = command.getDataFile();
        outputFile = command.getOutputFile();
        hasHeader = command.headerIncluded();
        overwrite = command.overwrite();
        specialDataCodes = new SpecialDataCodes(command.getSpecialCodesString().toString());
    }

    /**
     * 1. Gets the file header or creates one.
     * 2. Sets the number of columns
     * 3. Checks the type of data stored in each variable using the first rowsToScan rows.
     *    Variables are integers by default. This method will change the data type to either
     *    double or string.
     *
     */
    private void setDataTypes() {
        CSVParser parser = null;
        Reader reader = null;

        try {
            reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8");

            //Get column names from variable attributes
            colNames = new String[variableAttributeMap.size()];
            int index = 0;
            Iterator<VariableName> iter = variableAttributeMap.keySet().iterator();
            VariableName tempName = null;
            while (iter.hasNext()) {
                colNames[index++] = iter.next().toString();
            }

            //Create a parser with variable names from the variable attributes
            if (hasHeader) {
                parser = new CSVParser(reader,
                        dataFileFormat.withHeader(colNames).withSkipHeaderRecord(true).withCommentMarker('#'));
            } else {
                parser = new CSVParser(reader, dataFileFormat.withHeader(colNames).withCommentMarker('#'));
            }

            //Check data types in each column.
            String value = "";
            Iterator<CSVRecord> csvIter = parser.iterator();
            CSVRecord csvRecord = null;
            double testValue = 0;
            nrow = 0;

            while (csvIter.hasNext()) {
                csvRecord = csvIter.next();

                iter = variableAttributeMap.keySet().iterator();
                while (iter.hasNext()) {
                    tempName = iter.next();
                    value = csvRecord.get(tempName.toString()).trim();

                    //Check that string can be converted to double. If not, Change variable type.
                    //Ignore missing data and other special codes
                    try {
                        if (!"".equals(value) && !specialDataCodes.isMissing(value)) {
                            testValue = Double.parseDouble(value);
                            if (testValue != Math.floor(testValue)) {
                                //if any value is a double, the variable is a double
                                variableAttributeMap.get(tempName).setDataType(DataType.DOUBLE);
                            }
                        }
                    } catch (NumberFormatException ex) {
                        //if any value is a String, the variable is a String
                        variableAttributeMap.get(tempName).setDataType(DataType.STRING);
                    }
                }
                nrow++;
            }

        } catch (IOException ex) {
            theException = ex;
        } finally {
            try {
                if (parser != null)
                    parser.close();
                if (reader != null)
                    reader.close();
            } catch (IOException ex) {
                theException = ex;
                logger.fatal(ex);
            }
        }

    }

    /**
     * Create a header map to the CSV file, but imposes naming conventions on the column names.
     *
     */
    private void setVariableAttributes() {
        VariableAttributes variableAttributes = null;
        int position = 0;

        Reader reader = null;
        CSVParser parser = null;
        VariableName tempName = null;

        try {
            reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8");
            parser = new CSVParser(reader, dataFileFormat.withHeader());

            if (hasHeader) {
                Map<String, Integer> csvMap = parser.getHeaderMap();
                for (String s : csvMap.keySet()) {
                    variableAttributes = new VariableAttributes(new VariableName(s), new VariableLabel(""),
                            DataType.INTEGER, position);
                    variableAttributeMap.put(variableAttributes.getName(), variableAttributes);
                    position++;
                }
            } else {
                Iterator<CSVRecord> iter = parser.iterator();
                CSVRecord csvRecord = iter.next();

                for (int i = 0; i < csvRecord.size(); i++) {
                    variableAttributes = new VariableAttributes(new VariableName("v" + (i + 1)),
                            new VariableLabel(""), DataType.INTEGER, position);
                    variableAttributeMap.put(variableAttributes.getName(), variableAttributes);
                    position++;
                }
            }

        } catch (IOException ex) {
            theException = ex;
        } finally {
            try {
                if (parser != null)
                    parser.close();
                if (reader != null)
                    reader.close();
            } catch (IOException ex) {
                theException = ex;
            }
        }
    }

    public int getNumberOfColumns() {
        return variableAttributeMap.size();
    }

    private void convertFile() {
        CSVParser parser = null;
        Reader reader = null;
        CSVPrinter printer = null;
        Writer writer = null;

        try {
            if (outputFile.exists()) {
                if (!overwrite) {
                    theException = new IOException("File already exists and overwrite==false");
                    return;
                }
            } else {
                outputFile.createNewFile();
            }

            //For debugging
            //            System.out.println("CREATED: " + outputFile.getAbsolutePath());

            //Writer header to file
            writer = new OutputStreamWriter(new FileOutputStream(outputFile));
            printer = new CSVPrinter(writer, CSVFormat.DEFAULT.withCommentMarker('#'));

            printer.printComment("VERSION");
            printer.printRecord(new String[] { "jmetrik1" });
            printer.printComment("METADATA");
            printer.printRecord(new String[] { Integer.valueOf(nrow).toString() });
            printer.printComment("ATTRIBUTES");
            for (VariableName v : variableAttributeMap.keySet()) {
                printer.printRecord(variableAttributeMap.get(v).getAttributeArray());
            }
            printer.printComment("DATA");

            //Write data to file
            reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8");
            parser = new CSVParser(reader, dataFileFormat);

            if (hasHeader) {
                parser = new CSVParser(reader, dataFileFormat.withHeader(colNames).withSkipHeaderRecord(true));
            } else {
                parser = new CSVParser(reader, dataFileFormat.withHeader(colNames));
            }

            Iterator<CSVRecord> iter = parser.iterator();
            CSVRecord csvRecord = null;
            VariableAttributes variableAttributes = null;
            DataType dataType = null;
            String temp = "";

            while (iter.hasNext()) {
                csvRecord = iter.next();

                for (VariableName v : variableAttributeMap.keySet()) {
                    temp = csvRecord.get(v.toString());
                    variableAttributes = variableAttributeMap.get(v);
                    dataType = variableAttributes.getDataType();
                    if (!variableAttributes.isMissing(temp)) {
                        if (DataType.INTEGER == dataType) {
                            printer.print(Double.valueOf(Double.parseDouble(temp)).intValue());
                        } else if (DataType.DOUBLE == dataType) {
                            printer.print(Double.parseDouble(temp));
                        } else {
                            printer.print(temp);
                        }
                    } else {
                        printer.print(temp);
                    }

                }
                printer.println();
            }

        } catch (IOException ex) {
            theException = ex;
        } finally {
            try {
                if (parser != null)
                    parser.close();
                if (reader != null)
                    reader.close();
                if (printer != null)
                    printer.close();
                if (writer != null)
                    writer.close();
            } catch (IOException ex) {
                theException = ex;
                logger.fatal(ex);
            }
        }
    }

    @Override
    public String doInBackground() {
        firePropertyChange("status", "", "Importing file...");
        parseCommand(); //Processes command
        setVariableAttributes(); //Gets variable names from data file that is being imported
        setDataTypes(); //Gets data types from data file that is being imported
        convertFile(); //Writes output file
        return "Imported file";
    }

    @Override
    protected void done() {
        try {
            if (theException == null) {
                firePropertyChange("status", "", "Done: " + sw.getElapsedTime());
                scriptLogger.info(command.toString());
            } else {
                logger.fatal(theException.getMessage(), theException);
                firePropertyChange("error", "", "Error - Check log for details.");
            }
        } catch (Exception ex) {
            logger.fatal(ex.getMessage(), ex);
            firePropertyChange("error", "", "Error - Check log for details.");
        }

    }

}