Java tutorial
/* * Copyright (c) 2014 Patrick Meyer * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.itemanalysis.jmetrik.file; import com.itemanalysis.jmetrik.data.*; import com.itemanalysis.psychometrics.data.*; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVRecord; import org.apache.commons.io.input.BOMInputStream; import java.io.*; import java.nio.file.Files; import java.nio.file.Path; import java.util.Iterator; import java.util.LinkedHashMap; /** * Reads a *.jmetrik file. It will return the file header information as a map of VariableAttributes. * It work like a record set. An example usage is: * * File f = new File(fileName); * JmetrikReader reader = new JmetrikReader(f); * * try{ * reader.openConnection(); * LinkedHashMap<VariableName, VariableAttributes> variableAttributes = reader.getVariableAttributes(); * * //Assume a variable called sid exists in the file. * VariableName sid = new VariableName("sid"); * * //Read data * JmetrikCSVRecord jmetrikCSVRecord = null; * while(reader.hasNext()){ * jmetrikCSVRecord = reader.next(); * System.out.println(jmetrikCSVRecord.get(sid)); * } * }catch(IOException ex){ * ex.printStackTrace(); * }finally{ * if(reader!=null) reader.closeConnection(); * } * * * * */ public class JmetrikFileReader implements AutoCloseable { private Path f = null; private String version = ""; private int nrow = 0; private int ncol = 0; private CSVParser dataParser = null; private Iterator<CSVRecord> dataIterator = null; private Reader dataReader = null; private CSVRecord nextRecord = null; private LinkedHashMap<VariableName, VariableAttributes> variableAttributes = null; JmetrikCSVRecord jmetrikCSVRecord = null; public JmetrikFileReader(File f) { this.f = f.toPath(); } public JmetrikFileReader(Path f) { this.f = f; } /** * Reads the header of a *.jmetrik file and returns a map of the attributes. * * @return map of the file attributes * @throws IOException */ public LinkedHashMap<VariableName, VariableAttributes> getVariableAttributes() throws IOException { return variableAttributes; } /** * Extracts VariableAttributes from the file header. * * @param csvRecordIterator */ private void setVariableAttributes(Iterator<CSVRecord> csvRecordIterator) { variableAttributes = new LinkedHashMap<VariableName, VariableAttributes>(); boolean readAttributes = false; boolean readData = false; String name = ""; String type = ""; String scoring = ""; String codes = ""; String label = ""; String itemGroup = ""; DataType dataType = null; VariableAttributes temp = null; GenericItemScoring itemScoring = null; SpecialDataCodes specialDataCodes = null; String comment = ""; while (csvRecordIterator.hasNext() && !readData) { nextRecord = csvRecordIterator.next(); comment = nextRecord.getComment(); if ("VERSION".equals(comment)) { version = nextRecord.get(0); } else if ("METADATA".equals(comment)) { nrow = Integer.parseInt(nextRecord.get(0)); } else if ("ATTRIBUTES".equals(comment)) { readAttributes = true; } else if ("DATA".equals(comment)) { readData = true; readAttributes = false; } if (readAttributes) { name = nextRecord.get(0); type = nextRecord.get(1); scoring = nextRecord.get(2); codes = nextRecord.get(3); itemGroup = nextRecord.get(4); label = nextRecord.get(5); dataType = DataType.INTEGER; if (DataType.DOUBLE.toString().equals(type)) { dataType = DataType.DOUBLE; } else if (DataType.STRING.toString().equals(type)) { dataType = DataType.STRING; } temp = new VariableAttributes(new VariableName(name), new VariableLabel(label), dataType, ncol); temp.setItemGroup(itemGroup); itemScoring = new GenericItemScoring(name, scoring); // itemScoring = new GenericItemScoring(name); // itemScoring.parseItemScoring(scoring); //only set scoring if scoring was provided if (itemScoring.getItemType() != ItemType.NOT_ITEM) { temp.setItemScoring(itemScoring); } specialDataCodes = new SpecialDataCodes(); specialDataCodes.parseSpecialCodeString(codes); temp.setSpecialDataCodes(specialDataCodes); variableAttributes.put(temp.getName(), temp); ncol++; } } } /** * Moves cursor past the header to the first line of data */ private void advanceToFirstDataRecord() { boolean header = true; while (dataIterator.hasNext() && header) { nextRecord = dataIterator.next(); if ("DATA".equals(nextRecord.getComment())) header = false; } } /** * Opens a connection by instantiating the reader and CSVParser. It extracts the * header into the VariableAttributeMap and moves the cursor to the first row of data. * * @throws IOException */ public void openConnection() throws IOException { //opens file and advances cursor to beginning of data dataReader = new InputStreamReader(new BOMInputStream(Files.newInputStream(f)), "UTF-8"); dataParser = new CSVParser(dataReader, CSVFormat.DEFAULT.withCommentMarker('#')); dataIterator = dataParser.iterator(); setVariableAttributes(dataIterator); //Get Column names String[] colNames = new String[variableAttributes.size()]; int index = 0; for (VariableName v : variableAttributes.keySet()) { colNames[index] = v.toString(); index++; } dataReader.close(); dataParser.close(); //Advance iterator to first data record (A little inefficient because loops over header a second time) //This inefficiency is because CSVReader only allows the header to be set in the constructor. dataReader = new InputStreamReader(new BOMInputStream(Files.newInputStream(f)), "UTF-8"); dataParser = new CSVParser(dataReader, CSVFormat.DEFAULT.withCommentMarker('#').withHeader(colNames)); dataIterator = dataParser.iterator(); advanceToFirstDataRecord(); } /** * Closes the reader and parser. This method is part of the Autocloseable interface. * @throws IOException */ public void close() throws IOException { if (dataReader != null) dataReader.close(); if (dataParser != null) dataParser.close(); } /** * Returns the the attributes for a specific variable in the file. * * @param variableName name of variable for which the attributes are sought. * @return */ public VariableAttributes getVariableAttributesAt(VariableName variableName) { if (variableAttributes.keySet().contains(variableName)) { return variableAttributes.get(variableName); } return null; } /** * Gets the number of rows (i.e. cases) in the file. This information is not computed. * Rather, it is retrieved from the file header. * * @return */ public int getNumberOfRows() { return nrow; } /** * Gets the number of columns or number of variables in the data file. * * @return number of columns */ public int getNumberOfColumns() { return ncol; } /** * Moves the cursor to the next row of data. * * @return a row of data as a JmetrikCSVRecord */ public JmetrikCSVRecord next() { jmetrikCSVRecord = new JmetrikCSVRecord(nextRecord, variableAttributes); if (dataIterator.hasNext()) { nextRecord = dataIterator.next(); } else { nextRecord = null; } return jmetrikCSVRecord; } /** * Check to see if there is another row of data. * * @return true if another row exists, false otherwise. */ public boolean hasNext() { if (dataIterator.hasNext()) return true; return nextRecord != null; } /** * Gets all available data. * * @return an array of data. */ public Object[][] getData() { return getData(Integer.MAX_VALUE, Integer.MAX_VALUE); } /** * Reads the first maxRow rows and the first maxCol columns and returns them as an Object[][] array. * This method is mainly used for creating a TableModel and displaying data in a JTable. * This method can only be called once. To call it a second time, close the connection and reopen it. * * @param maxRow maximum number of rows to read. * @param maxCol maximum number of columns to read. * @return array of data values. */ public Object[][] getData(int maxRow, int maxCol) { int r = Math.min(nrow, maxRow); int c = Math.min(ncol, maxCol); Object[][] data = new Object[r][c]; int i = 0; int j = 0; JmetrikCSVRecord record = null; VariableAttributes tempAttributes = null; while (this.hasNext()) { record = this.next(); j = 0; inner: for (VariableName v : variableAttributes.keySet()) { tempAttributes = variableAttributes.get(v); if (tempAttributes.getDataType() == DataType.DOUBLE) { if (tempAttributes.isMissing(record.originalValue(v))) { data[i][j] = null; } else { data[i][j] = record.valueOfAsDouble(v); } } else if (tempAttributes.getDataType() == DataType.INTEGER) { if (tempAttributes.isMissing(record.originalValue(v))) { data[i][j] = null; } else { data[i][j] = record.valueOfAsInt(v); } } else { if (tempAttributes.isMissing(record.originalValue(v))) { data[i][j] = null; } else { data[i][j] = record.originalValue(v); } } j++; if (j == maxCol) break inner; } i++; if (i == maxRow) break; } return data; } /** * Get an array of all column classes. * @return */ public Class[] getColumnClasses() { return getColumnClasses(Integer.MAX_VALUE); } /** * Creates an array of classes based on the type of data contained in the first maxCol variables. * This method is mainly used for creating a TableModel and displaying data in a JTable. * * @param maxCol maximum number of columns to read. * @return array of column classes. */ public Class[] getColumnClasses(int maxCol) { int c = Math.min(ncol, maxCol); int j = 0; Class[] colClass = new Class[c]; VariableAttributes tempAttributes = null; for (VariableName v : variableAttributes.keySet()) { tempAttributes = variableAttributes.get(v); if (tempAttributes.getDataType() == DataType.DOUBLE) { colClass[j] = Double.class; } else if (tempAttributes.getDataType() == DataType.INTEGER) { colClass[j] = Integer.class; } else { colClass[j] = String.class; } j++; if (j == c) break; } return colClass; } public String[] getColumnNames() { return getColumnNames(Integer.MAX_VALUE); } /** * Gets an array of column name for the first maxCol variables. * This method is mainly used for creating a TableModel and displaying data in a JTable. * * @param maxCol maximum number of columns to read. * @return */ public String[] getColumnNames(int maxCol) { int c = Math.min(ncol, maxCol); int j = 0; String[] colName = new String[c]; for (VariableName v : variableAttributes.keySet()) { colName[j] = v.toString(); j++; if (j == c) break; } return colName; } }