Java tutorial
/* * Copyright (c) 2014 Patrick Meyer * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.itemanalysis.jmetrik.file; import com.itemanalysis.jmetrik.manager.ImportDataCommand; import com.itemanalysis.psychometrics.data.*; import com.itemanalysis.psychometrics.tools.StopWatch; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVPrinter; import org.apache.commons.csv.CSVRecord; import org.apache.commons.io.input.BOMInputStream; import org.apache.log4j.Logger; import javax.swing.*; import java.io.*; import java.util.*; /** * This class converts a delimited file into a *.jmetrik file. It scans the delimited data file * to determine the variable names and type of data. It may also read an item scoring file to * add attributes about item scoring. Finally, it also obtain attribute information from the * import command such as the missing data codes. * * When attaching item scoring to variable attributes, the variables are matched by name. If no * match is found, the item scoring is ignored. * */ public class JmetrikFileImporter extends SwingWorker<String, Void> { private boolean hasHeader = true; private boolean overwrite = false; private int nrow = 0; private File dataFile = null; private String[] colNames = null; private File outputFile = null; private SpecialDataCodes specialDataCodes = null; private ImportDataCommand command = null; private LinkedHashMap<VariableName, VariableAttributes> variableAttributeMap = null; private Throwable theException = null; private StopWatch sw = null; private CSVFormat dataFileFormat = CSVFormat.DEFAULT; static Logger logger = Logger.getLogger("jmetrik-logger"); static Logger scriptLogger = Logger.getLogger("jmetrik-script-logger"); int tempCount = 0; public JmetrikFileImporter(ImportDataCommand command) { this.command = command; variableAttributeMap = new LinkedHashMap<VariableName, VariableAttributes>(); sw = new StopWatch(); } private void parseCommand() { dataFile = command.getDataFile(); outputFile = command.getOutputFile(); hasHeader = command.headerIncluded(); overwrite = command.overwrite(); specialDataCodes = new SpecialDataCodes(command.getSpecialCodesString().toString()); } /** * 1. Gets the file header or creates one. * 2. Sets the number of columns * 3. Checks the type of data stored in each variable using the first rowsToScan rows. * Variables are integers by default. This method will change the data type to either * double or string. * */ private void setDataTypes() { CSVParser parser = null; Reader reader = null; try { reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8"); //Get column names from variable attributes colNames = new String[variableAttributeMap.size()]; int index = 0; Iterator<VariableName> iter = variableAttributeMap.keySet().iterator(); VariableName tempName = null; while (iter.hasNext()) { colNames[index++] = iter.next().toString(); } //Create a parser with variable names from the variable attributes if (hasHeader) { parser = new CSVParser(reader, dataFileFormat.withHeader(colNames).withSkipHeaderRecord(true).withCommentMarker('#')); } else { parser = new CSVParser(reader, dataFileFormat.withHeader(colNames).withCommentMarker('#')); } //Check data types in each column. String value = ""; Iterator<CSVRecord> csvIter = parser.iterator(); CSVRecord csvRecord = null; double testValue = 0; nrow = 0; while (csvIter.hasNext()) { csvRecord = csvIter.next(); iter = variableAttributeMap.keySet().iterator(); while (iter.hasNext()) { tempName = iter.next(); value = csvRecord.get(tempName.toString()).trim(); //Check that string can be converted to double. If not, Change variable type. //Ignore missing data and other special codes try { if (!"".equals(value) && !specialDataCodes.isMissing(value)) { testValue = Double.parseDouble(value); if (testValue != Math.floor(testValue)) { //if any value is a double, the variable is a double variableAttributeMap.get(tempName).setDataType(DataType.DOUBLE); } } } catch (NumberFormatException ex) { //if any value is a String, the variable is a String variableAttributeMap.get(tempName).setDataType(DataType.STRING); } } nrow++; } } catch (IOException ex) { theException = ex; } finally { try { if (parser != null) parser.close(); if (reader != null) reader.close(); } catch (IOException ex) { theException = ex; logger.fatal(ex); } } } /** * Create a header map to the CSV file, but imposes naming conventions on the column names. * */ private void setVariableAttributes() { VariableAttributes variableAttributes = null; int position = 0; Reader reader = null; CSVParser parser = null; VariableName tempName = null; try { reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8"); parser = new CSVParser(reader, dataFileFormat.withHeader()); if (hasHeader) { Map<String, Integer> csvMap = parser.getHeaderMap(); for (String s : csvMap.keySet()) { variableAttributes = new VariableAttributes(new VariableName(s), new VariableLabel(""), DataType.INTEGER, position); variableAttributeMap.put(variableAttributes.getName(), variableAttributes); position++; } } else { Iterator<CSVRecord> iter = parser.iterator(); CSVRecord csvRecord = iter.next(); for (int i = 0; i < csvRecord.size(); i++) { variableAttributes = new VariableAttributes(new VariableName("v" + (i + 1)), new VariableLabel(""), DataType.INTEGER, position); variableAttributeMap.put(variableAttributes.getName(), variableAttributes); position++; } } } catch (IOException ex) { theException = ex; } finally { try { if (parser != null) parser.close(); if (reader != null) reader.close(); } catch (IOException ex) { theException = ex; } } } public int getNumberOfColumns() { return variableAttributeMap.size(); } private void convertFile() { CSVParser parser = null; Reader reader = null; CSVPrinter printer = null; Writer writer = null; try { if (outputFile.exists()) { if (!overwrite) { theException = new IOException("File already exists and overwrite==false"); return; } } else { outputFile.createNewFile(); } //For debugging // System.out.println("CREATED: " + outputFile.getAbsolutePath()); //Writer header to file writer = new OutputStreamWriter(new FileOutputStream(outputFile)); printer = new CSVPrinter(writer, CSVFormat.DEFAULT.withCommentMarker('#')); printer.printComment("VERSION"); printer.printRecord(new String[] { "jmetrik1" }); printer.printComment("METADATA"); printer.printRecord(new String[] { Integer.valueOf(nrow).toString() }); printer.printComment("ATTRIBUTES"); for (VariableName v : variableAttributeMap.keySet()) { printer.printRecord(variableAttributeMap.get(v).getAttributeArray()); } printer.printComment("DATA"); //Write data to file reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8"); parser = new CSVParser(reader, dataFileFormat); if (hasHeader) { parser = new CSVParser(reader, dataFileFormat.withHeader(colNames).withSkipHeaderRecord(true)); } else { parser = new CSVParser(reader, dataFileFormat.withHeader(colNames)); } Iterator<CSVRecord> iter = parser.iterator(); CSVRecord csvRecord = null; VariableAttributes variableAttributes = null; DataType dataType = null; String temp = ""; while (iter.hasNext()) { csvRecord = iter.next(); for (VariableName v : variableAttributeMap.keySet()) { temp = csvRecord.get(v.toString()); variableAttributes = variableAttributeMap.get(v); dataType = variableAttributes.getDataType(); if (!variableAttributes.isMissing(temp)) { if (DataType.INTEGER == dataType) { printer.print(Double.valueOf(Double.parseDouble(temp)).intValue()); } else if (DataType.DOUBLE == dataType) { printer.print(Double.parseDouble(temp)); } else { printer.print(temp); } } else { printer.print(temp); } } printer.println(); } } catch (IOException ex) { theException = ex; } finally { try { if (parser != null) parser.close(); if (reader != null) reader.close(); if (printer != null) printer.close(); if (writer != null) writer.close(); } catch (IOException ex) { theException = ex; logger.fatal(ex); } } } @Override public String doInBackground() { firePropertyChange("status", "", "Importing file..."); parseCommand(); //Processes command setVariableAttributes(); //Gets variable names from data file that is being imported setDataTypes(); //Gets data types from data file that is being imported convertFile(); //Writes output file return "Imported file"; } @Override protected void done() { try { if (theException == null) { firePropertyChange("status", "", "Done: " + sw.getElapsedTime()); scriptLogger.info(command.toString()); } else { logger.fatal(theException.getMessage(), theException); firePropertyChange("error", "", "Error - Check log for details."); } } catch (Exception ex) { logger.fatal(ex.getMessage(), ex); firePropertyChange("error", "", "Error - Check log for details."); } } }