List of usage examples for java.nio ByteBuffer getDouble
public abstract double getDouble();
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java
private void decodeData(BufferedInputStream stream) throws IOException { dbgLog.fine("\n***** decodeData(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }/* www . jav a 2 s . co m*/ //int nvar = (Integer)smd.getFileInformation().get("varQnty"); int nvar = dataTable.getVarQuantity().intValue(); //int nobs = (Integer)smd.getFileInformation().get("caseQnty"); int nobs = dataTable.getCaseQuantity().intValue(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("data dimensions[observations x variables] = (" + nobs + "x" + nvar + ")"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("bytes per row=" + bytes_per_row + " bytes"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("variableTypes=" + Arrays.deepToString(variableTypes)); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("StringLengthTable=" + StringLengthTable); } // create a File object to save the tab-delimited data file FileOutputStream fileOutTab = null; PrintWriter pwout = null; File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab"); // save the temp tab-delimited file in the return ingest object: ingesteddata.setTabDelimitedFile(tabDelimitedDataFile); fileOutTab = new FileOutputStream(tabDelimitedDataFile); pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); /* Should we lose this dateFormat thing in 4.0? * the UNF should be calculatable on the app side solely from the data * stored in the tab file and the type information stored the dataVariable * object. * furthermore, the very idea of storing a format entry not just for * every variable, but for every value/observation is a bit strange. * TODO: review and confirm that, in the 3.* implementation, every * entry in dateFormat[nvar][*] is indeed the same - except for the * missing value entries. -- L.A. 4.0 (OK, I got rid of the dateFormat; instead I kinda sorta assume that the format is the same for every value in a column, save for the missing values... like this: dataTable.getDataVariables().get(columnCounter).setFormatSchemaName(ddt.format); BUT, this needs to be reviewed/confirmed etc! */ //String[][] dateFormat = new String[nvar][nobs]; for (int i = 0; i < nobs; i++) { byte[] dataRowBytes = new byte[bytes_per_row]; Object[] dataRow = new Object[nvar]; int nbytes = stream.read(dataRowBytes, 0, bytes_per_row); if (nbytes == 0) { String errorMessage = "reading data: no data were read at(" + i + "th row)"; throw new IOException(errorMessage); } // decoding each row int byte_offset = 0; for (int columnCounter = 0; columnCounter < variableTypes.length; columnCounter++) { Integer varType = variableTypeMap.get(variableTypes[columnCounter]); // 4.0 Check if this is a time/date variable: boolean isDateTimeDatum = false; String formatCategory = dataTable.getDataVariables().get(columnCounter).getFormatCategory(); if (formatCategory != null && (formatCategory.equals("time") || formatCategory.equals("date"))) { isDateTimeDatum = true; } String variableFormat = dateVariableFormats[columnCounter]; switch (varType != null ? varType : 256) { case -5: // Byte case // note: 1 byte signed byte byte_datum = dataRowBytes[byte_offset]; if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column byte =" + byte_datum); } if (byte_datum >= BYTE_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { dataRow[columnCounter] = byte_datum; } byte_offset++; break; case -4: // Stata-int (=java's short: 2byte) case // note: 2-byte signed int, not java's int ByteBuffer int_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 2); if (isLittleEndian) { int_buffer.order(ByteOrder.LITTLE_ENDIAN); } short short_datum = int_buffer.getShort(); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column stata int =" + short_datum); } if (short_datum >= INT_MISSIG_VALUE) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column stata long missing value=" + short_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("short", variableFormat, Short.toString(short_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; //dateFormat[columnCounter][i] = ddt.format; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = short_datum; } } byte_offset += 2; break; case -3: // stata-Long (= java's int: 4 byte) case // note: 4-byte singed, not java's long //dbgLog.fine("DATreader: stata long"); ByteBuffer long_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { long_buffer.order(ByteOrder.LITTLE_ENDIAN); } int int_datum = long_buffer.getInt(); if (dbgLog.isLoggable(Level.FINE)) { //dbgLog.fine(i + "-th row " + columnCounter // + "=th column stata long =" + int_datum); } if (int_datum >= LONG_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINE)) { //dbgLog.fine(i + "-th row " + columnCounter // + "=th column stata long missing value=" + int_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("int", variableFormat, Integer.toString(int_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = int_datum; } } byte_offset += 4; break; case -2: // float case // note: 4-byte ByteBuffer float_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { float_buffer.order(ByteOrder.LITTLE_ENDIAN); } float float_datum = float_buffer.getFloat(); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column float =" + float_datum); } if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column float missing value=" + float_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("float", variableFormat, doubleNumberFormatter.format(float_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = float_datum; // This may be temporary - but for now (as in, while I'm testing // 4.0 ingest against 3.* ingest, I need to be able to tell if a // floating point value was a single, or double float in the // original STATA file: -- L.A. Jul. 2014 dataTable.getDataVariables().get(columnCounter).setFormat("float"); } } byte_offset += 4; break; case -1: // double case // note: 8-byte ByteBuffer double_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 8); if (isLittleEndian) { double_buffer.order(ByteOrder.LITTLE_ENDIAN); } double double_datum = double_buffer.getDouble(); if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column double missing value=" + double_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("double", variableFormat, doubleNumberFormatter.format(double_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = doubleNumberFormatter.format(double_datum); } } byte_offset += 8; break; case 0: // String case int strVarLength = StringLengthTable.get(columnCounter); String raw_datum = new String( Arrays.copyOfRange(dataRowBytes, byte_offset, (byte_offset + strVarLength)), "ISO-8859-1"); // TODO: // is it the right thing to do, to default to "ISO-8859-1"? // (it may be; since there's no mechanism for specifying // alternative encodings in Stata, this may be their default; // it just needs to be verified. -- L.A. Jul. 2014) String string_datum = getNullStrippedString(raw_datum); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column string =" + string_datum); } if (string_datum.isEmpty()) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column string missing value=" + string_datum); } // TODO: /* Is this really a missing value case? * Or is it an honest empty string? * Is there such a thing as a missing value for a String in Stata? * -- L.A. 4.0 */ dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { /* * Some special characters, like new lines and tabs need to * be escaped - otherwise they will break our TAB file * structure! * But before we escape anything, all the back slashes * already in the string need to be escaped themselves. */ String escapedString = string_datum.replace("\\", "\\\\"); // escape quotes: escapedString = escapedString.replaceAll("\"", Matcher.quoteReplacement("\\\"")); // escape tabs and new lines: escapedString = escapedString.replaceAll("\t", Matcher.quoteReplacement("\\t")); escapedString = escapedString.replaceAll("\n", Matcher.quoteReplacement("\\n")); escapedString = escapedString.replaceAll("\r", Matcher.quoteReplacement("\\r")); // the escaped version of the string is stored in the tab file // enclosed in double-quotes; this is in order to be able // to differentiate between an empty string (tab-delimited empty string in // double quotes) and a missing value (tab-delimited empty string). // Although the question still remains - is it even possible // to store an empty string, that's not a missing value, in Stata? // - see the comment in the missing value case above. -- L.A. 4.0 dataRow[columnCounter] = "\"" + escapedString + "\""; } byte_offset += strVarLength; break; default: dbgLog.fine("unknown variable type found"); String errorMessage = "unknow variable Type found at data section"; throw new InvalidObjectException(errorMessage); } // switch } // for-columnCounter // Dump the row of data to the tab-delimited file we are producing: pwout.println(StringUtils.join(dataRow, "\t")); if (dbgLog.isLoggable(Level.FINE)) { //dbgLog.fine(i + "-th row's data={" + StringUtils.join(dataRow, ",") + "};"); } } // for- i (row) pwout.close(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("variableTypes:\n" + Arrays.deepToString(variableTypes)); } dbgLog.fine("DTA Ingest: decodeData(): end."); }
From source file:com.healthmarketscience.jackcess.Column.java
/** * Deserialize a raw byte value for this column into an Object * @param data The raw byte value// w ww .j a v a2s . c o m * @param order Byte order in which the raw value is stored * @return The deserialized Object * @usage _advanced_method_ */ public Object read(byte[] data, ByteOrder order) throws IOException { ByteBuffer buffer = ByteBuffer.wrap(data); buffer.order(order); if (_type == DataType.BOOLEAN) { throw new IOException("Tried to read a boolean from data instead of null mask."); } else if (_type == DataType.BYTE) { return Byte.valueOf(buffer.get()); } else if (_type == DataType.INT) { return Short.valueOf(buffer.getShort()); } else if (_type == DataType.LONG) { return Integer.valueOf(buffer.getInt()); } else if (_type == DataType.DOUBLE) { return Double.valueOf(buffer.getDouble()); } else if (_type == DataType.FLOAT) { return Float.valueOf(buffer.getFloat()); } else if (_type == DataType.SHORT_DATE_TIME) { return readDateValue(buffer); } else if (_type == DataType.BINARY) { return data; } else if (_type == DataType.TEXT) { return decodeTextValue(data); } else if (_type == DataType.MONEY) { return readCurrencyValue(buffer); } else if (_type == DataType.OLE) { if (data.length > 0) { return readLongValue(data); } return null; } else if (_type == DataType.MEMO) { if (data.length > 0) { return readLongStringValue(data); } return null; } else if (_type == DataType.NUMERIC) { return readNumericValue(buffer); } else if (_type == DataType.GUID) { return readGUIDValue(buffer, order); } else if ((_type == DataType.UNKNOWN_0D) || (_type == DataType.UNKNOWN_11)) { // treat like "binary" data return data; } else if (_type == DataType.COMPLEX_TYPE) { return new ComplexValueForeignKey(this, buffer.getInt()); } else if (_type.isUnsupported()) { return rawDataWrapper(data); } else { throw new IOException("Unrecognized data type: " + _type); } }
From source file:au.org.ala.layers.intersect.Grid.java
/** * @return calculated min and max values of a grid file as float [] where [0] is min and [1] is max. *//* w w w.jav a 2s . co m*/ public float[] calculatetMinMax() { float[] ret = new float[2]; ret[0] = Float.MAX_VALUE; ret[1] = Float.MAX_VALUE * -1; long i; int size; byte[] b; RandomAccessFile afile = null; try { //read of random access file can throw an exception File f2 = new File(filename + ".GRI"); if (!f2.exists()) { afile = new RandomAccessFile(filename + ".gri", "r"); } else { afile = new RandomAccessFile(filename + ".GRI", "r"); } long length = ((long) nrows) * ((long) ncols); float f; if (datatype.equalsIgnoreCase("BYTE")) { size = 1; b = new byte[size]; for (i = 0; i < length; i++) { f = afile.readByte(); if (f != (float) nodatavalue) { ret[0] = Math.min(f * rescale, ret[0]); ret[1] = Math.max(f * rescale, ret[1]); } } } else if (datatype.equalsIgnoreCase("UBYTE")) { size = 1; b = new byte[size]; for (i = 0; i < length; i++) { f = afile.readByte(); if (f < 0) { f += 256; } if (f != (float) nodatavalue) { ret[0] = Math.min(f * rescale, ret[0]); ret[1] = Math.max(f * rescale, ret[1]); } } } else if (datatype.equalsIgnoreCase("SHORT")) { size = 2; b = new byte[size]; for (i = 0; i < length; i++) { afile.read(b); if (byteorderLSB) { f = (short) (((0xFF & b[1]) << 8) | (b[0] & 0xFF)); } else { f = (short) (((0xFF & b[0]) << 8) | (b[1] & 0xFF)); } if (f != (float) nodatavalue) { ret[0] = Math.min(f * rescale, ret[0]); ret[1] = Math.max(f * rescale, ret[1]); } } } else if (datatype.equalsIgnoreCase("INT")) { size = 4; b = new byte[size]; for (i = 0; i < length; i++) { afile.read(b); if (byteorderLSB) { f = ((0xFF & b[3]) << 24) | ((0xFF & b[2]) << 16) + ((0xFF & b[1]) << 8) + (b[0] & 0xFF); } else { f = ((0xFF & b[0]) << 24) | ((0xFF & b[1]) << 16) + ((0xFF & b[2]) << 8) + ((0xFF & b[3]) & 0xFF); } if (f != (float) nodatavalue) { ret[0] = Math.min(f * rescale, ret[0]); ret[1] = Math.max(f * rescale, ret[1]); } } } else if (datatype.equalsIgnoreCase("LONG")) { size = 8; b = new byte[size]; for (i = 0; i < length; i++) { afile.read(b); if (byteorderLSB) { f = ((long) (0xFF & b[7]) << 56) + ((long) (0xFF & b[6]) << 48) + ((long) (0xFF & b[5]) << 40) + ((long) (0xFF & b[4]) << 32) + ((long) (0xFF & b[3]) << 24) + ((long) (0xFF & b[2]) << 16) + ((long) (0xFF & b[1]) << 8) + (0xFF & b[0]); } else { f = ((long) (0xFF & b[0]) << 56) + ((long) (0xFF & b[1]) << 48) + ((long) (0xFF & b[2]) << 40) + ((long) (0xFF & b[3]) << 32) + ((long) (0xFF & b[4]) << 24) + ((long) (0xFF & b[5]) << 16) + ((long) (0xFF & b[6]) << 8) + (0xFF & b[7]); } if (f != (float) nodatavalue) { ret[0] = Math.min(f * rescale, ret[0]); ret[1] = Math.max(f * rescale, ret[1]); } } } else if (datatype.equalsIgnoreCase("FLOAT")) { size = 4; b = new byte[size]; for (i = 0; i < length; i++) { afile.read(b); ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } f = bb.getFloat(); if (f != (float) nodatavalue) { ret[0] = Math.min(f * rescale, ret[0]); ret[1] = Math.max(f * rescale, ret[1]); } } } else if (datatype.equalsIgnoreCase("DOUBLE")) { size = 8; b = new byte[8]; for (i = 0; i < length; i++) { afile.read(b); ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } f = (float) bb.getDouble(); if (f != (float) nodatavalue) { ret[0] = Math.min(f * rescale, ret[0]); ret[1] = Math.max(f * rescale, ret[1]); } } } else { logger.error("datatype not supported in Grid.getValues: " + datatype); } } catch (Exception e) { logger.error("error calculating min/max of a grid file", e); } finally { if (afile != null) { try { afile.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } return ret; }
From source file:au.org.ala.layers.intersect.Grid.java
/** * @param points input array for longitude and latitude * double[number_of_points][2] * @return array of .gri file values corresponding to the * points provided/*from ww w . j av a2 s . c o m*/ */ public float[] getValues(double[][] points) { //confirm inputs since they come from somewhere else if (points == null || points.length == 0) { return null; } //use preloaded grid data if available Grid g = Grid.getLoadedGrid(filename); if (g != null) { return g.getValues2(points); } if (subgrids != null) { return getValues3(points, Math.min(1024 * 1024, 64 * points.length)); } float[] ret = new float[points.length]; int length = points.length; long size; int i, pos; byte[] b; RandomAccessFile afile = null; File f2 = new File(filename + ".GRI"); try { //read of random access file can throw an exception if (!f2.exists()) { afile = new RandomAccessFile(filename + ".gri", "r"); } else { afile = new RandomAccessFile(filename + ".GRI", "r"); } if (datatype.equalsIgnoreCase("BYTE")) { size = 1; b = new byte[(int) size]; for (i = 0; i < length; i++) { pos = (int) getcellnumber(points[i][0], points[i][1]); if (pos >= 0) { afile.seek(pos * size); ret[i] = afile.readByte(); } else { ret[i] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("UBYTE")) { size = 1; b = new byte[(int) size]; for (i = 0; i < length; i++) { pos = (int) getcellnumber(points[i][0], points[i][1]); if (pos >= 0) { afile.seek(pos * size); ret[i] = afile.readByte(); if (ret[i] < 0) { ret[i] += 256; } } else { ret[i] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("SHORT")) { size = 2; b = new byte[(int) size]; for (i = 0; i < length; i++) { pos = (int) getcellnumber(points[i][0], points[i][1]); if (pos >= 0) { afile.seek(pos * size); afile.read(b); if (byteorderLSB) { ret[i] = (short) (((0xFF & b[1]) << 8) | (b[0] & 0xFF)); } else { ret[i] = (short) (((0xFF & b[0]) << 8) | (b[1] & 0xFF)); } //ret[i] = afile.readShort(); } else { ret[i] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("INT")) { size = 4; b = new byte[(int) size]; for (i = 0; i < length; i++) { pos = (int) getcellnumber(points[i][0], points[i][1]); if (pos >= 0) { afile.seek(pos * size); afile.read(b); if (byteorderLSB) { ret[i] = ((0xFF & b[3]) << 24) | ((0xFF & b[2]) << 16) + ((0xFF & b[1]) << 8) + (b[0] & 0xFF); } else { ret[i] = ((0xFF & b[0]) << 24) | ((0xFF & b[1]) << 16) + ((0xFF & b[2]) << 8) + ((0xFF & b[3]) & 0xFF); } //ret[i] = afile.readInt(); } else { ret[i] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("LONG")) { size = 8; b = new byte[(int) size]; for (i = 0; i < length; i++) { pos = (int) getcellnumber(points[i][0], points[i][1]); if (pos >= 0) { afile.seek(pos * size); afile.read(b); if (byteorderLSB) { ret[i] = ((long) (0xFF & b[7]) << 56) + ((long) (0xFF & b[6]) << 48) + ((long) (0xFF & b[5]) << 40) + ((long) (0xFF & b[4]) << 32) + ((long) (0xFF & b[3]) << 24) + ((long) (0xFF & b[2]) << 16) + ((long) (0xFF & b[1]) << 8) + (0xFF & b[0]); } else { ret[i] = ((long) (0xFF & b[0]) << 56) + ((long) (0xFF & b[1]) << 48) + ((long) (0xFF & b[2]) << 40) + ((long) (0xFF & b[3]) << 32) + ((long) (0xFF & b[4]) << 24) + ((long) (0xFF & b[5]) << 16) + ((long) (0xFF & b[6]) << 8) + (0xFF & b[7]); } //ret[i] = afile.readLong(); } else { ret[i] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("FLOAT")) { size = 4; b = new byte[(int) size]; for (i = 0; i < length; i++) { pos = (int) getcellnumber(points[i][0], points[i][1]); if (pos >= 0) { afile.seek(pos * size); afile.read(b); ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } ret[i] = bb.getFloat(); } else { ret[i] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("DOUBLE")) { size = 8; b = new byte[8]; for (i = 0; i < length; i++) { pos = (int) getcellnumber(points[i][0], points[i][1]); if (pos >= 0) { afile.seek(pos * size); afile.read(b); ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } ret[i] = (float) bb.getDouble(); //ret[i] = afile.readFloat(); } else { ret[i] = Float.NaN; } } } else { logger.error("datatype not supported in Grid.getValues: " + datatype); // / should not happen; catch anyway... for (i = 0; i < length; i++) { ret[i] = Float.NaN; } } //replace not a number for (i = 0; i < length; i++) { if ((float) ret[i] == (float) nodatavalue) { ret[i] = Float.NaN; } else { ret[i] *= rescale; } } } catch (Exception e) { logger.error("error getting grid file values", e); } finally { if (afile != null) { try { afile.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } return ret; }
From source file:au.org.ala.layers.intersect.Grid.java
/** * @param points input array for longitude and latitude * double[number_of_points][2] and sorted latitude then longitude * @return array of .gri file values corresponding to the * points provided/*from w w w . j av a 2 s .c o m*/ */ public float[] getValues3(double[][] points, int bufferSize) { //confirm inputs since they come from somewhere else if (points == null || points.length == 0) { return null; } if (subgrids != null) { return getValuesSubgrids(points, bufferSize); } //use preloaded grid data if available Grid g = Grid.getLoadedGrid(filename); if (g != null && g.grid_data != null) { return g.getValues2(points); } int length = points.length; int size, i; byte[] b; RandomAccessFile afile = null; File f2 = new File(filename + ".GRI"); try { //read of random access file can throw an exception if (!f2.exists()) { afile = new RandomAccessFile(filename + ".gri", "r"); } else { afile = new RandomAccessFile(filename + ".GRI", "r"); } //do not cache subgrids (using getValues2) if (!subgrid && afile.length() < 80 * 1024 * 1024) { try { afile.close(); afile = null; } catch (Exception e) { } return getValues2(points); } byte[] buffer = new byte[bufferSize]; //must be multiple of 64 Long bufferOffset = afile.length(); float[] ret = new float[points.length]; //get cell numbers long[][] cells = new long[points.length][2]; for (int j = 0; j < points.length; j++) { if (Double.isNaN(points[j][0]) || Double.isNaN(points[j][1])) { cells[j][0] = -1; cells[j][1] = j; } else { cells[j][0] = getcellnumber(points[j][0], points[j][1]); cells[j][1] = j; } } java.util.Arrays.sort(cells, new Comparator<long[]>() { @Override public int compare(long[] o1, long[] o2) { if (o1[0] == o2[0]) { return o1[1] > o2[1] ? 1 : -1; } else { return o1[0] > o2[0] ? 1 : -1; } } }); if (datatype.equalsIgnoreCase("BYTE")) { size = 1; for (i = 0; i < length; i++) { if (i > 0 && cells[i - 1][0] == cells[i][0]) { ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]]; continue; } if (cells[i][0] >= 0) { ret[(int) cells[i][1]] = getByte(afile, buffer, bufferOffset, cells[i][0] * size); } else { ret[(int) cells[i][1]] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("UBYTE")) { size = 1; for (i = 0; i < length; i++) { if (i > 0 && cells[i - 1][0] == cells[i][0]) { ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]]; continue; } if (cells[i][0] >= 0) { ret[(int) cells[i][1]] = getByte(afile, buffer, bufferOffset, cells[i][0] * size); if (ret[(int) cells[i][1]] < 0) { ret[(int) cells[i][1]] += 256; } } else { ret[(int) cells[i][1]] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("SHORT")) { size = 2; b = new byte[size]; for (i = 0; i < length; i++) { if (i > 0 && cells[i - 1][0] == cells[i][0]) { ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]]; continue; } if (cells[i][0] >= 0) { bufferOffset = getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b); if (byteorderLSB) { ret[(int) cells[i][1]] = (short) (((0xFF & b[1]) << 8) | (b[0] & 0xFF)); } else { ret[(int) cells[i][1]] = (short) (((0xFF & b[0]) << 8) | (b[1] & 0xFF)); } } else { ret[(int) cells[i][1]] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("INT")) { size = 4; b = new byte[size]; for (i = 0; i < length; i++) { if (i > 0 && cells[i - 1][0] == cells[i][0]) { ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]]; continue; } if (cells[i][0] >= 0) { bufferOffset = getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b); if (byteorderLSB) { ret[(int) cells[i][1]] = ((0xFF & b[3]) << 24) | ((0xFF & b[2]) << 16) + ((0xFF & b[1]) << 8) + (b[0] & 0xFF); } else { ret[(int) cells[i][1]] = ((0xFF & b[0]) << 24) | ((0xFF & b[1]) << 16) + ((0xFF & b[2]) << 8) + ((0xFF & b[3]) & 0xFF); } } else { ret[(int) cells[i][1]] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("LONG")) { size = 8; b = new byte[size]; for (i = 0; i < length; i++) { if (i > 0 && cells[i - 1][0] == cells[i][0]) { ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]]; continue; } if (cells[i][0] >= 0) { bufferOffset = getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b); if (byteorderLSB) { ret[(int) cells[i][1]] = ((long) (0xFF & b[7]) << 56) + ((long) (0xFF & b[6]) << 48) + ((long) (0xFF & b[5]) << 40) + ((long) (0xFF & b[4]) << 32) + ((long) (0xFF & b[3]) << 24) + ((long) (0xFF & b[2]) << 16) + ((long) (0xFF & b[1]) << 8) + (0xFF & b[0]); } else { ret[(int) cells[i][1]] = ((long) (0xFF & b[0]) << 56) + ((long) (0xFF & b[1]) << 48) + ((long) (0xFF & b[2]) << 40) + ((long) (0xFF & b[3]) << 32) + ((long) (0xFF & b[4]) << 24) + ((long) (0xFF & b[5]) << 16) + ((long) (0xFF & b[6]) << 8) + (0xFF & b[7]); } } else { ret[(int) cells[i][1]] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("FLOAT")) { size = 4; b = new byte[size]; for (i = 0; i < length; i++) { if (i > 0 && cells[i - 1][0] == cells[i][0]) { ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]]; continue; } if (cells[i][0] >= 0) { bufferOffset = getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b); ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } ret[(int) cells[i][1]] = bb.getFloat(); } else { ret[(int) cells[i][1]] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("DOUBLE")) { size = 8; b = new byte[8]; for (i = 0; i < length; i++) { if (i > 0 && cells[i - 1][0] == cells[i][0]) { ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]]; continue; } if (cells[i][0] >= 0) { getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b); ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } ret[(int) cells[i][1]] = (float) bb.getDouble(); } else { ret[(int) cells[i][1]] = Float.NaN; } } } else { logger.error("datatype not supported in Grid.getValues: " + datatype); // / should not happen; catch anyway... for (i = 0; i < length; i++) { ret[i] = Float.NaN; } } //replace not a number for (i = 0; i < length; i++) { if ((float) ret[i] == (float) nodatavalue) { ret[i] = Float.NaN; } else { ret[i] *= rescale; } } return ret; } catch (Exception e) { logger.error("error getting grid file values", e); } finally { if (afile != null) { try { afile.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } return null; }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
void decodeRecordType1(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType1(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }/* w w w.j ava2 s. c o m*/ // how to read each recordType // 1. set-up the following objects before reading bytes // a. the working byte array // b. the storage object // the length of this field: 172bytes = 60 + 4 + 12 + 4 + 8 + 84 // this field consists of 6 distinct blocks byte[] recordType1 = new byte[LENGTH_RECORDTYPE1]; // int caseWeightVariableOBSIndex = 0; try { int nbytes = stream.read(recordType1, 0, LENGTH_RECORDTYPE1); //printHexDump(recordType1, "recordType1"); if (nbytes == 0) { throw new IOException("reading recordType1: no byte was read"); } // 1.1 60 byte-String that tells the platform/version of SPSS that // wrote this file int offset_start = 0; int offset_end = LENGTH_SPSS_PRODUCT_INFO; // 60 bytes String productInfo = new String(Arrays.copyOfRange(recordType1, offset_start, offset_end), "US-ASCII"); dbgLog.fine("productInfo:\n" + productInfo + "\n"); dataTable.setOriginalFormatVersion(productInfo); // try to parse out the SPSS version that created this data // file: String spssVersionTag = null; String regexpVersionNumber = ".*Release ([0-9]*)"; Pattern versionTagPattern = Pattern.compile(regexpVersionNumber); Matcher matcher = versionTagPattern.matcher(productInfo); if (matcher.find()) { spssVersionTag = matcher.group(1); dbgLog.fine("SPSS Version Number: " + spssVersionTag); } // TODO: // try a more elaborate regex (like the one for the "new-style" // productInfo line, below), to select the version number, the // minor version number and the platform (windows vs. mac) separately. // would be cleaner to save just that, rather than the entire // productInfo tag. // -- L.A. 4.0 beta if (spssVersionTag == null || spssVersionTag.equals("")) { // Later versions of SPSS have different formatting of the // productInfo line: regexpVersionNumber = ".* IBM SPSS STATISTICS.* ([^ ]*) ([0-9][0-9]*)([^ ]*)"; versionTagPattern = Pattern.compile(regexpVersionNumber); matcher = versionTagPattern.matcher(productInfo); if (matcher.find()) { String spssPlatformTag = matcher.group(1); spssVersionTag = matcher.group(2); String spssVersionTagMinor = matcher.group(3); dbgLog.fine("SPSS Version Number (new style): " + spssVersionTag); dbgLog.fine("SPSS Version/Platform Identification (new style:) " + spssPlatformTag + " " + spssVersionTag + spssVersionTagMinor); dataTable .setOriginalFormatVersion(spssVersionTag + spssVersionTagMinor + " " + spssPlatformTag); } } if (spssVersionTag != null && !spssVersionTag.equals("")) { spssVersionNumber = Integer.valueOf(spssVersionTag).intValue(); /* * Starting with SPSS version 16, the default encoding is * UTF-8. * But we are only going to use it if the user did not explicitly * specify the encoding on the addfiles page. Then we'd want * to stick with whatever they entered. * (also, it appears that (starting with the same version 16?) * it is actually possible to define the locale/character set * in the file - section 7, sub-type 20; TODO: decide which * one takes precedence, if we have the encoding defined both * in the file and through the UI. -- L.A. 4.0 beta) */ if (spssVersionNumber > 15) { if (getDataLanguageEncoding() == null) { //defaultCharSet = "windows-1252"; // temporary! -- L.A. "UTF-8"; defaultCharSet = "UTF-8"; } } } // TODO: // decide if we want to save the [determined/guessed] character set // somewhere in the dataset object. // this may be relevant in cases when accented/non-latin characters // get ingested incorrectly; // -- L.A. 4.0 beta // 1.2) 4-byte file-layout-code (byte-order) offset_start = offset_end; offset_end += LENGTH_FILE_LAYOUT_CODE; // 4 byte ByteBuffer bb_fileLayout_code = ByteBuffer.wrap(recordType1, offset_start, LENGTH_FILE_LAYOUT_CODE); ByteBuffer byteOderTest = bb_fileLayout_code.duplicate(); // interprete the 4 byte as int int int2test = byteOderTest.getInt(); if (int2test == 2 || int2test == 3) { dbgLog.fine("integer == " + int2test + ": the byte-oder of the writer is the same " + "as the counterpart of Java: Big Endian"); } else { // Because Java's byte-order is always big endian, // this(!=2) means this sav file was written on a little-endian machine // non-string, multi-bytes blocks must be byte-reversed bb_fileLayout_code.order(ByteOrder.LITTLE_ENDIAN); int2test = bb_fileLayout_code.getInt(); if (int2test == 2 || int2test == 3) { dbgLog.fine("The sav file was saved on a little endian machine"); dbgLog.fine("Reveral of the bytes is necessary to decode " + "multi-byte, non-string blocks"); isLittleEndian = true; } else { throw new IOException("reading recordType1:unknown file layout code=" + int2test); } } dbgLog.fine("Endian of this platform:" + ByteOrder.nativeOrder().toString()); // 1.3 4-byte Number_Of_OBS_Units_Per_Case // (= how many RT2 records => how many varilables) offset_start = offset_end; offset_end += LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE; // 4 byte ByteBuffer bb_OBS_units_per_case = ByteBuffer.wrap(recordType1, offset_start, LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE); if (isLittleEndian) { bb_OBS_units_per_case.order(ByteOrder.LITTLE_ENDIAN); } OBSUnitsPerCase = bb_OBS_units_per_case.getInt(); dbgLog.fine("RT1: OBSUnitsPerCase=" + OBSUnitsPerCase); // 1.4 4-byte Compression_Switch offset_start = offset_end; offset_end += LENGTH_COMPRESSION_SWITCH; // 4 byte ByteBuffer bb_compression_switch = ByteBuffer.wrap(recordType1, offset_start, LENGTH_COMPRESSION_SWITCH); if (isLittleEndian) { bb_compression_switch.order(ByteOrder.LITTLE_ENDIAN); } int compression_switch = bb_compression_switch.getInt(); if (compression_switch == 0) { // data section is not compressed isDataSectionCompressed = false; dbgLog.fine("data section is not compressed"); } else { dbgLog.fine("data section is compressed:" + compression_switch); } // 1.5 4-byte Case-Weight Variable Index // warning: this variable index starts from 1, not 0 offset_start = offset_end; offset_end += LENGTH_CASE_WEIGHT_VARIABLE_INDEX; // 4 byte ByteBuffer bb_Case_Weight_Variable_Index = ByteBuffer.wrap(recordType1, offset_start, LENGTH_CASE_WEIGHT_VARIABLE_INDEX); if (isLittleEndian) { bb_Case_Weight_Variable_Index.order(ByteOrder.LITTLE_ENDIAN); } caseWeightVariableOBSIndex = bb_Case_Weight_Variable_Index.getInt(); /// caseWeightVariableOBSIndex will be used later on to locate /// the weight variable; so we'll be able to mark the corresponding /// variables properly. // TODO: make sure case weight variables are properly handled! // -- L.A. 4.0 beta ///smd.getFileInformation().put("caseWeightVariableOBSIndex", caseWeightVariableOBSIndex); // 1.6 4-byte Number of Cases offset_start = offset_end; offset_end += LENGTH_NUMBER_OF_CASES; // 4 byte ByteBuffer bb_Number_Of_Cases = ByteBuffer.wrap(recordType1, offset_start, LENGTH_NUMBER_OF_CASES); if (isLittleEndian) { bb_Number_Of_Cases.order(ByteOrder.LITTLE_ENDIAN); } int numberOfCases = bb_Number_Of_Cases.getInt(); if (numberOfCases < 0) { // -1 if numberOfCases is unknown throw new RuntimeException("number of cases is not recorded in the header"); } else { dbgLog.fine("RT1: number of cases is recorded= " + numberOfCases); dataTable.setCaseQuantity(new Long(numberOfCases)); ///caseQnty = numberOfCases; ///smd.getFileInformation().put("caseQnty", numberOfCases); } // 1.7 8-byte compression-bias [not long but double] offset_start = offset_end; offset_end += LENGTH_COMPRESSION_BIAS; // 8 byte ByteBuffer bb_compression_bias = ByteBuffer .wrap(Arrays.copyOfRange(recordType1, offset_start, offset_end)); if (isLittleEndian) { bb_compression_bias.order(ByteOrder.LITTLE_ENDIAN); } Double compressionBias = bb_compression_bias.getDouble(); // TODO: // check if this "compression bias" is being used anywhere? // doesn't seem to be! // -- 4.0 alpha if (compressionBias == 100d) { // 100 is expected dbgLog.fine("compressionBias is 100 as expected"); ///smd.getFileInformation().put("compressionBias", 100); } else { dbgLog.fine("compression bias is not 100: " + compressionBias); ///smd.getFileInformation().put("compressionBias", compressionBias); } // 1.8 84-byte File Creation Information (date/time: dd MM yyhh:mm:ss + // 64-bytelabel) offset_start = offset_end; offset_end += LENGTH_FILE_CREATION_INFO; // 84 bytes String fileCreationInfo = getNullStrippedString( new String(Arrays.copyOfRange(recordType1, offset_start, offset_end), "US-ASCII")); dbgLog.fine("fileCreationInfo:\n" + fileCreationInfo + "\n"); String fileCreationDate = fileCreationInfo.substring(0, length_file_creation_date); int dateEnd = length_file_creation_date + length_file_creation_time; String fileCreationTime = fileCreationInfo.substring(length_file_creation_date, (dateEnd)); String fileCreationNote = fileCreationInfo.substring(dateEnd, length_file_creation_label); dbgLog.fine("fileDate=" + fileCreationDate); dbgLog.fine("fileTime=" + fileCreationTime); dbgLog.fine("fileNote" + fileCreationNote); } catch (IOException ex) { throw ex; } dbgLog.fine("decodeRecordType1(): end"); }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java
void decodeRecordType1(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType1(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }/*from w ww .ja v a 2 s . co m*/ // how to read each recordType // 1. set-up the following objects before reading bytes // a. the working byte array // b. the storage object // the length of this field: 172bytes = 60 + 4 + 12 + 4 + 8 + 84 // this field consists of 6 distinct blocks byte[] recordType1 = new byte[LENGTH_RECORDTYPE1]; // int caseWeightVariableOBSIndex = 0; try { int nbytes = stream.read(recordType1, 0, LENGTH_RECORDTYPE1); //printHexDump(recordType1, "recordType1"); if (nbytes == 0) { throw new IOException("reading recordType1: no byte was read"); } // 1.1 60 byte-String that tells the platform/version of SPSS that // wrote this file int offset_start = 0; int offset_end = LENGTH_SPSS_PRODUCT_INFO; // 60 bytes String productInfo = new String(Arrays.copyOfRange(recordType1, offset_start, offset_end), "US-ASCII"); dbgLog.fine("productInfo:\n" + productInfo + "\n"); // add the info to the fileInfo smd.getFileInformation().put("productInfo", productInfo); // try to parse out the SPSS version that created this data // file: String spssVersionNumberTag = null; String regexpVersionNumber = ".*Release ([0-9]*)"; Pattern patternJsession = Pattern.compile(regexpVersionNumber); Matcher matcher = patternJsession.matcher(productInfo); if (matcher.find()) { spssVersionNumberTag = matcher.group(1); dbgLog.fine("SPSS Version Number: " + spssVersionNumberTag); } if (spssVersionNumberTag != null && !spssVersionNumberTag.equals("")) { spssVersionNumber = Integer.valueOf(spssVersionNumberTag).intValue(); /* * Starting with SPSS version 16, the default encoding is * UTF-8. * But we are only going to use it if the user did not explicitly * specify the encoding on the addfiles page. Then we'd want * to stick with whatever they entered. */ if (spssVersionNumber > 15) { if (getDataLanguageEncoding() == null) { defaultCharSet = "UTF-8"; } } } smd.getFileInformation().put("charset", defaultCharSet); // 1.2) 4-byte file-layout-code (byte-order) offset_start = offset_end; offset_end += LENGTH_FILE_LAYOUT_CODE; // 4 byte ByteBuffer bb_fileLayout_code = ByteBuffer.wrap(recordType1, offset_start, LENGTH_FILE_LAYOUT_CODE); ByteBuffer byteOderTest = bb_fileLayout_code.duplicate(); // interprete the 4 byte as int int int2test = byteOderTest.getInt(); if (int2test == 2 || int2test == 3) { dbgLog.fine("integer == " + int2test + ": the byte-oder of the writer is the same " + "as the counterpart of Java: Big Endian"); } else { // Because Java's byte-order is always big endian, // this(!=2) means this sav file was written on a little-endian machine // non-string, multi-bytes blocks must be byte-reversed bb_fileLayout_code.order(ByteOrder.LITTLE_ENDIAN); int2test = bb_fileLayout_code.getInt(); if (int2test == 2 || int2test == 3) { dbgLog.fine("The sav file was saved on a little endian machine"); dbgLog.fine("Reveral of the bytes is necessary to decode " + "multi-byte, non-string blocks"); isLittleEndian = true; } else { throw new IOException("reading recordType1:unknown file layout code=" + int2test); } } dbgLog.fine("Endian of this platform:" + ByteOrder.nativeOrder().toString()); smd.getFileInformation().put("OSByteOrder", ByteOrder.nativeOrder().toString()); smd.getFileInformation().put("byteOrder", int2test); // 1.3 4-byte Number_Of_OBS_Units_Per_Case // (= how many RT2 records => how many varilables) offset_start = offset_end; offset_end += LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE; // 4 byte ByteBuffer bb_OBS_units_per_case = ByteBuffer.wrap(recordType1, offset_start, LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE); if (isLittleEndian) { bb_OBS_units_per_case.order(ByteOrder.LITTLE_ENDIAN); } OBSUnitsPerCase = bb_OBS_units_per_case.getInt(); dbgLog.fine("RT1: OBSUnitsPerCase=" + OBSUnitsPerCase); smd.getFileInformation().put("OBSUnitsPerCase", OBSUnitsPerCase); // 1.4 4-byte Compression_Switch offset_start = offset_end; offset_end += LENGTH_COMPRESSION_SWITCH; // 4 byte ByteBuffer bb_compression_switch = ByteBuffer.wrap(recordType1, offset_start, LENGTH_COMPRESSION_SWITCH); if (isLittleEndian) { bb_compression_switch.order(ByteOrder.LITTLE_ENDIAN); } int compression_switch = bb_compression_switch.getInt(); if (compression_switch == 0) { // data section is not compressed isDataSectionCompressed = false; dbgLog.fine("data section is not compressed"); } else { dbgLog.fine("data section is compressed:" + compression_switch); } smd.getFileInformation().put("compressedData", compression_switch); // 1.5 4-byte Case-Weight Variable Index // warning: this variable index starts from 1, not 0 offset_start = offset_end; offset_end += LENGTH_CASE_WEIGHT_VARIABLE_INDEX; // 4 byte ByteBuffer bb_Case_Weight_Variable_Index = ByteBuffer.wrap(recordType1, offset_start, LENGTH_CASE_WEIGHT_VARIABLE_INDEX); if (isLittleEndian) { bb_Case_Weight_Variable_Index.order(ByteOrder.LITTLE_ENDIAN); } caseWeightVariableOBSIndex = bb_Case_Weight_Variable_Index.getInt(); smd.getFileInformation().put("caseWeightVariableOBSIndex", caseWeightVariableOBSIndex); // 1.6 4-byte Number of Cases offset_start = offset_end; offset_end += LENGTH_NUMBER_OF_CASES; // 4 byte ByteBuffer bb_Number_Of_Cases = ByteBuffer.wrap(recordType1, offset_start, LENGTH_NUMBER_OF_CASES); if (isLittleEndian) { bb_Number_Of_Cases.order(ByteOrder.LITTLE_ENDIAN); } int numberOfCases = bb_Number_Of_Cases.getInt(); if (numberOfCases < 0) { // -1 if numberOfCases is unknown throw new RuntimeException("number of cases is not recorded in the header"); } else { dbgLog.fine("RT1: number of cases is recorded= " + numberOfCases); caseQnty = numberOfCases; smd.getFileInformation().put("caseQnty", numberOfCases); } // 1.7 8-byte compression-bias [not long but double] offset_start = offset_end; offset_end += LENGTH_COMPRESSION_BIAS; // 8 byte ByteBuffer bb_compression_bias = ByteBuffer .wrap(Arrays.copyOfRange(recordType1, offset_start, offset_end)); if (isLittleEndian) { bb_compression_bias.order(ByteOrder.LITTLE_ENDIAN); } Double compressionBias = bb_compression_bias.getDouble(); if (compressionBias == 100d) { // 100 is expected dbgLog.fine("compressionBias is 100 as expected"); smd.getFileInformation().put("compressionBias", 100); } else { dbgLog.fine("compression bias is not 100: " + compressionBias); smd.getFileInformation().put("compressionBias", compressionBias); } // 1.8 84-byte File Creation Information (date/time: dd MM yyhh:mm:ss + // 64-bytelabel) offset_start = offset_end; offset_end += LENGTH_FILE_CREATION_INFO; // 84 bytes String fileCreationInfo = getNullStrippedString( new String(Arrays.copyOfRange(recordType1, offset_start, offset_end), "US-ASCII")); dbgLog.fine("fileCreationInfo:\n" + fileCreationInfo + "\n"); String fileCreationDate = fileCreationInfo.substring(0, length_file_creation_date); int dateEnd = length_file_creation_date + length_file_creation_time; String fileCreationTime = fileCreationInfo.substring(length_file_creation_date, (dateEnd)); String fileCreationNote = fileCreationInfo.substring(dateEnd, length_file_creation_label); dbgLog.fine("fileDate=" + fileCreationDate); dbgLog.fine("fileTime=" + fileCreationTime); dbgLog.fine("fileNote" + fileCreationNote); smd.getFileInformation().put("fileDate", fileCreationDate); smd.getFileInformation().put("fileTime", fileCreationTime); smd.getFileInformation().put("fileNote", fileCreationNote); smd.getFileInformation().put("varFormat_schema", "SPSS"); // add the info to the fileInfo smd.getFileInformation().put("mimeType", MIME_TYPE[0]); smd.getFileInformation().put("fileFormat", MIME_TYPE[0]); smd.setValueLabelMappingTable(valueVariableMappingTable); } catch (IOException ex) { //ex.printStackTrace(); throw ex; } dbgLog.fine("***** decodeRecordType1(): end *****"); }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
private void parseRT7SubTypefield(BufferedInputStream stream) throws IOException { int length_unit_length = 4; int length_number_of_units = 4; int storage_size = length_unit_length + length_number_of_units; int[] headerSection = new int[2]; byte[] byteStorage = new byte[storage_size]; try {//from w w w .j ava 2 s. c om int nbytes = stream.read(byteStorage); // to-do check against nbytes //printHexDump(byteStorage, "RT7:storage"); ByteBuffer bb_data_type = ByteBuffer.wrap(byteStorage, 0, length_unit_length); if (isLittleEndian) { bb_data_type.order(ByteOrder.LITTLE_ENDIAN); } int unitLength = bb_data_type.getInt(); dbgLog.fine("parseRT7 SubTypefield: unitLength=" + unitLength); ByteBuffer bb_number_of_units = ByteBuffer.wrap(byteStorage, length_unit_length, length_number_of_units); if (isLittleEndian) { bb_number_of_units.order(ByteOrder.LITTLE_ENDIAN); } int numberOfUnits = bb_number_of_units.getInt(); dbgLog.fine("parseRT7 SubTypefield: numberOfUnits=" + numberOfUnits); headerSection[0] = unitLength; headerSection[1] = numberOfUnits; for (int i = 0; i < numberOfUnits; i++) { byte[] work = new byte[unitLength]; int nb = stream.read(work); dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work))); ByteBuffer bb_field = ByteBuffer.wrap(work); if (isLittleEndian) { bb_field.order(ByteOrder.LITTLE_ENDIAN); } dbgLog.fine("RT7ST: raw bytes in Hex:" + new String(Hex.encodeHex(bb_field.array()))); if (unitLength == 4) { int fieldData = bb_field.getInt(); dbgLog.fine("RT7ST: " + i + "-th fieldData=" + fieldData); dbgLog.fine("RT7ST: fieldData in Hex=" + Integer.toHexString(fieldData)); } else if (unitLength == 8) { double fieldData = bb_field.getDouble(); dbgLog.finer("RT7ST: " + i + "-th fieldData=" + fieldData); dbgLog.finer("RT7ST: fieldData in Hex=" + Double.toHexString(fieldData)); } dbgLog.finer(""); } } catch (IOException ex) { //ex.printStackTrace(); throw ex; } }
From source file:au.org.ala.layers.intersect.Grid.java
public float[] getGrid() { int maxArrayLength = Integer.MAX_VALUE - 10; if (grid_data != null) { return grid_data; }//from w w w . ja v a 2s .c o m Grid loadedAlready = getLoadedGrid(filename); if (loadedAlready != null && loadedAlready.grid_data != null) { return loadedAlready.grid_data; } int length = nrows * ncols; float[] ret = new float[length]; RandomAccessFile afile = null; File f2 = new File(filename + ".GRI"); try { //read of random access file can throw an exception if (!f2.exists()) { afile = new RandomAccessFile(filename + ".gri", "r"); } else { afile = new RandomAccessFile(filename + ".GRI", "r"); } byte[] b = new byte[(int) Math.min(afile.length(), maxArrayLength)]; int i = 0; int max = 0; int len; while ((len = afile.read(b)) > 0) { ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } if (datatype.equalsIgnoreCase("UBYTE")) { max += len; max = Math.min(max, ret.length); for (; i < max; i++) { ret[i] = bb.get(); if (ret[i] < 0) { ret[i] += 256; } } } else if (datatype.equalsIgnoreCase("BYTE")) { max += len; max = Math.min(max, ret.length); for (; i < max; i++) { ret[i] = bb.get(); } } else if (datatype.equalsIgnoreCase("SHORT")) { max += len / 2; max = Math.min(max, ret.length); for (; i < max; i++) { ret[i] = bb.getShort(); } } else if (datatype.equalsIgnoreCase("INT")) { max += len / 4; max = Math.min(max, ret.length); for (; i < max; i++) { ret[i] = bb.getInt(); } } else if (datatype.equalsIgnoreCase("LONG")) { max += len / 8; max = Math.min(max, ret.length); for (; i < max; i++) { ret[i] = bb.getLong(); } } else if (datatype.equalsIgnoreCase("FLOAT")) { max += len / 4; max = Math.min(max, ret.length); for (; i < max; i++) { ret[i] = bb.getFloat(); } } else if (datatype.equalsIgnoreCase("DOUBLE")) { max += len / 8; max = Math.min(max, ret.length); for (; i < max; i++) { ret[i] = (float) bb.getDouble(); } } else { // / should not happen; catch anyway... max += len / 4; for (; i < max; i++) { ret[i] = Float.NaN; } } } //replace not a number for (i = 0; i < length; i++) { if ((float) ret[i] == (float) nodatavalue) { ret[i] = Float.NaN; } else { ret[i] *= rescale; } } } catch (Exception e) { logger.error("An error has occurred - probably a file error", e); } finally { if (afile != null) { try { afile.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } grid_data = ret; return ret; }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordTypeDataCompressed(): start *****"); if (stream == null) { throw new IllegalArgumentException("decodeRecordTypeDataCompressed: stream == null!"); }/*from w w w . j a v a 2s . c o m*/ PrintWriter pwout = createOutputWriter(stream); int varQnty = dataTable.getVarQuantity().intValue(); int caseQnty = dataTable.getCaseQuantity().intValue(); dbgLog.fine("varQnty: " + varQnty); dateFormatList = new String[varQnty]; boolean hasStringVarContinuousBlock = obsNonVariableBlockSet.size() > 0 ? true : false; dbgLog.fine("hasStringVarContinuousBlock=" + hasStringVarContinuousBlock); int ii = 0; int OBS = LENGTH_SAV_OBS_BLOCK; int nOBS = OBSUnitsPerCase; dbgLog.fine("OBSUnitsPerCase=" + OBSUnitsPerCase); int caseIndex = 0; dbgLog.fine("printFormatTable:\n" + printFormatTable); variableFormatTypeList = new String[varQnty]; for (int i = 0; i < varQnty; i++) { variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE .get(printFormatTable.get(variableNameList.get(i))); dbgLog.fine("i=" + i + "th variableFormatTypeList=" + variableFormatTypeList[i]); formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]); } dbgLog.fine("variableFormatType:\n" + Arrays.deepToString(variableFormatTypeList)); dbgLog.fine("formatCategoryTable:\n" + formatCategoryTable); // TODO: // Make sure the date formats are actually preserved! // (this is something that was collected in the code below and passed // to the UNF calculator). // -- L.A. 4.0 alpha List<String> casewiseRecordForTabFile = new ArrayList<String>(); try { // this compression is applied only to non-float data, i.e. integer; // 8-byte float datum is kept in tact boolean hasReachedEOF = false; OBSERVATION: while (true) { dbgLog.fine("SAV Reader: compressed: ii=" + ii + "-th iteration"); byte[] octate = new byte[LENGTH_SAV_OBS_BLOCK]; int nbytes = stream.read(octate); // processCompressedOBSblock () // (this means process a block of 8 compressed OBS // values -- should result in 64 bytes of data total) for (int i = 0; i < LENGTH_SAV_OBS_BLOCK; i++) { dbgLog.finer("i=" + i + "-th iteration"); int octate_i = octate[i]; //dbgLog.fine("octate="+octate_i); if (octate_i < 0) { octate_i += 256; } int byteCode = octate_i;//octate_i & 0xF; //out.println("byeCode="+byteCode); // processCompressedOBS switch (byteCode) { case 252: // end of the file dbgLog.fine("SAV Reader: compressed: end of file mark [FC] was found"); hasReachedEOF = true; break; case 253: // FD: uncompressed data follows after this octate // long string datum or float datum // read the following octate byte[] uncompressedByte = new byte[LENGTH_SAV_OBS_BLOCK]; int ucbytes = stream.read(uncompressedByte); int typeIndex = (ii * OBS + i) % nOBS; if ((OBSwiseTypelList.get(typeIndex) > 0) || (OBSwiseTypelList.get(typeIndex) == -1)) { // code= >0 |-1: string or its conitiguous block // decode as a string object String strdatum = new String( Arrays.copyOfRange(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK), defaultCharSet); //out.println("str_datum="+strdatum+"<-"); // add this non-missing-value string datum casewiseRecordForTabFile.add(strdatum); //out.println("casewiseRecordForTabFile(String)="+casewiseRecordForTabFile); } else if (OBSwiseTypelList.get(typeIndex) == -2) { String strdatum = new String( Arrays.copyOfRange(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK - 1), defaultCharSet); casewiseRecordForTabFile.add(strdatum); //out.println("casewiseRecordForTabFile(String)="+casewiseRecordForTabFile); } else if (OBSwiseTypelList.get(typeIndex) == 0) { // code= 0: numeric ByteBuffer bb_double = ByteBuffer.wrap(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK); if (isLittleEndian) { bb_double.order(ByteOrder.LITTLE_ENDIAN); } Double ddatum = bb_double.getDouble(); // out.println("ddatum="+ddatum); // add this non-missing-value numeric datum casewiseRecordForTabFile.add(doubleNumberFormatter.format(ddatum)); dbgLog.fine("SAV Reader: compressed: added value to dataLine: " + ddatum); } else { dbgLog.fine("SAV Reader: out-of-range exception"); throw new IOException("out-of-range value was found"); } /* // EOF-check after reading this octate if (stream.available() == 0){ hasReachedEOF = true; dbgLog.fine( "SAV Reader: *** After reading an uncompressed octate," + " reached the end of the file at "+ii +"th iteration and i="+i+"th octate position [0-start] *****"); } */ break; case 254: // FE: used as the missing value for string variables // an empty case in a string variable also takes this value // string variable does not accept space-only data // cf: uncompressed case // 20 20 20 20 20 20 20 20 // add the string missing value // out.println("254: String missing data"); casewiseRecordForTabFile.add(" "); // add "." here? // Note that technically this byte flag (254/xFE) means // that *eight* white space characters should be // written to the output stream. This caused me // a great amount of confusion, because it appeared // to me that there was a mismatch between the number // of bytes advertised in the variable metadata and // the number of bytes actually found in the data // section of a compressed SAV file; this is because // these 8 bytes "come out of nowhere"; they are not // written in the data section, but this flag specifies // that they should be added to the output. // Also, as I pointed out above, we are only writing // out one whitespace character, not 8 as instructed. // This appears to be legit; these blocks of 8 spaces // seem to be only used for padding, and all such // multiple padding spaces are stripped anyway during // the post-processing. break; case 255: // FF: system missing value for numeric variables // cf: uncompressed case (sysmis) // FF FF FF FF FF FF eF FF(little endian) // add the numeric missing value dbgLog.fine("SAV Reader: compressed: Missing Value, numeric"); casewiseRecordForTabFile.add(MissingValueForTextDataFileNumeric); break; case 0: // 00: do nothing dbgLog.fine("SAV Reader: compressed: doing nothing (zero); "); break; default: //out.println("byte code(default)="+ byteCode); if ((byteCode > 0) && (byteCode < 252)) { // datum is compressed //Integer unCompressed = Integer.valueOf(byteCode -100); // add this uncompressed numeric datum Double unCompressed = Double.valueOf(byteCode - 100); dbgLog.fine("SAV Reader: compressed: default case: " + unCompressed); casewiseRecordForTabFile.add(doubleNumberFormatter.format(unCompressed)); // out.println("uncompressed="+unCompressed); // out.println("dataline="+casewiseRecordForTabFile); } }// end of switch // out.println("end of switch"); // The-end-of-a-case(row)-processing // this line that follows, and the code around it // is really confusing: int varCounter = (ii * OBS + i + 1) % nOBS; // while both OBS and LENGTH_SAV_OBS_BLOCK = 8 // (OBS was initialized as OBS=LENGTH_SAV_OBS_BLOCK), // the 2 values mean different things: // LENGTH_SAV_OBS_BLOCK is the number of bytes in one OBS; // and OBS is the number of OBS blocks that we process // at a time. I.e., we process 8 chunks of 8 bytes at a time. // This is how data is organized inside an SAV file: // 8 bytes of compression flags, followd by 8x8 or fewer // (depending on the flags) bytes of compressed data. // I should rename this OBS variable something more // meaningful. // // Also, the "varCounter" variable name is entirely // misleading -- it counts not variables, but OBS blocks. dbgLog.fine("SAV Reader: compressed: OBS counter=" + varCounter + "(ii=" + ii + ")"); if ((ii * OBS + i + 1) % nOBS == 0) { //out.println("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile); // out.println("all variables in a case are parsed == nOBS"); // out.println("hasStringVarContinuousBlock="+hasStringVarContinuousBlock); // check whether a string-variable's continuous block exits // if so, they must be joined if (hasStringVarContinuousBlock) { // string-variable's continuous-block-concatenating-processing //out.println("concatenating process starts"); //out.println("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile); //out.println("casewiseRecordForTabFile(before:size)="+casewiseRecordForTabFile.size()); StringBuilder sb = new StringBuilder(""); int firstPosition = 0; Set<Integer> removeJset = new HashSet<Integer>(); for (int j = 0; j < nOBS; j++) { dbgLog.fine("RTD: j=" + j + "-th type =" + OBSwiseTypelList.get(j)); if ((OBSwiseTypelList.get(j) == -1) || (OBSwiseTypelList.get(j) == -2)) { // Continued String variable found at j-th // position. look back the j-1 firstPosition = j - 1; int lastJ = j; String concatenated = null; removeJset.add(j); sb.append(casewiseRecordForTabFile.get(j - 1)); sb.append(casewiseRecordForTabFile.get(j)); for (int jc = 1;; jc++) { if ((j + jc == nOBS) || ((OBSwiseTypelList.get(j + jc) != -1) && (OBSwiseTypelList.get(j + jc) != -2))) { // j is the end unit of this string variable concatenated = sb.toString(); sb.setLength(0); lastJ = j + jc; break; } else { sb.append(casewiseRecordForTabFile.get(j + jc)); removeJset.add(j + jc); } } casewiseRecordForTabFile.set(j - 1, concatenated); //out.println(j-1+"th concatenated="+concatenated); j = lastJ - 1; } // end-of-if: continuous-OBS only } // end of loop-j //out.println("removeJset="+removeJset); // a new list that stores a new case with concatanated string data List<String> newDataLine = new ArrayList<String>(); for (int jl = 0; jl < casewiseRecordForTabFile.size(); jl++) { //out.println("jl="+jl+"-th datum =["+casewiseRecordForTabFile.get(jl)+"]"); if (!removeJset.contains(jl)) { // if (casewiseRecordForTabFile.get(jl).equals(MissingValueForTextDataFileString)){ // out.println("NA-S jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]"); // } else if (casewiseRecordForTabFile.get(jl).equals(MissingValueForTextDataFileNumeric)){ // out.println("NA-N jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]"); // } else if (casewiseRecordForTabFile.get(jl)==null){ // out.println("null case jl="+jl+"=["+casewiseRecordForTabFile.get(jl)+"]"); // } else if (casewiseRecordForTabFile.get(jl).equals("NaN")){ // out.println("NaN jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]"); // } else if (casewiseRecordForTabFile.get(jl).equals("")){ // out.println("blank jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]"); // } else if (casewiseRecordForTabFile.get(jl).equals(" ")){ // out.println("space jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]"); // } newDataLine.add(casewiseRecordForTabFile.get(jl)); } else { // out.println("Excluded: jl="+jl+"-th datum=["+casewiseRecordForTabFile.get(jl)+"]"); } } // end of loop-jl //out.println("new casewiseRecordForTabFile="+newDataLine); //out.println("new casewiseRecordForTabFile(size)="+newDataLine.size()); casewiseRecordForTabFile = newDataLine; } // end-if: stringContinuousVar-exist case // caseIndex starts from 1 not 0 caseIndex = (ii * OBS + i + 1) / nOBS; for (int k = 0; k < casewiseRecordForTabFile.size(); k++) { dbgLog.fine("k=" + k + "-th variableTypelList=" + variableTypelList.get(k)); if (variableTypelList.get(k) > 0) { // Strip the String variables off the // whitespace padding: // [ snipped ] // I've removed the block of code above where // String values were substring()-ed to the // length specified in the variable metadata; // Doing that was not enough, since a string // can still be space-padded inside its // advertised capacity. (note that extended // variables can have many kylobytes of such // padding in them!) Plus it was completely // redundant, since we are stripping all the // trailing white spaces with // StringUtils.stripEnd() below: String paddRemoved = StringUtils .stripEnd(casewiseRecordForTabFile.get(k).toString(), null); // TODO: clean this up. For now, just make sure that strings contain at least one blank space. if (paddRemoved.equals("")) { paddRemoved = " "; } //casewiseRecordForTabFile.set(k, "\"" + paddRemoved.replaceAll("\"", Matcher.quoteReplacement("\\\"")) + "\""); casewiseRecordForTabFile.set(k, escapeCharacterString(paddRemoved)); // end of String var case } // end of variable-type check if (casewiseRecordForTabFile.get(k) != null && !casewiseRecordForTabFile.get(k) .equals(MissingValueForTextDataFileNumeric)) { String variableFormatType = variableFormatTypeList[k]; dbgLog.finer("k=" + k + "th printFormatTable format=" + printFormatTable.get(variableNameList.get(k))); int formatDecimalPointPosition = formatDecimalPointPositionList.get(k); if (variableFormatType.equals("date")) { dbgLog.finer("date case"); long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L - SPSS_DATE_OFFSET; String newDatum = sdf_ymd.format(new Date(dateDatum)); dbgLog.finer("k=" + k + ":" + newDatum); /* saving date format */ dbgLog.finer("saving dateFormat[k] = " + sdf_ymd.toPattern()); casewiseRecordForTabFile.set(k, newDatum); dateFormatList[k] = sdf_ymd.toPattern(); //formatCategoryTable.put(variableNameList.get(k), "date"); } else if (variableFormatType.equals("time")) { dbgLog.finer("time case:DTIME or DATETIME or TIME"); //formatCategoryTable.put(variableNameList.get(k), "time"); if (printFormatTable.get(variableNameList.get(k)).equals("DTIME")) { // We're not even going to try to handle "DTIME" // values as time/dates in dataverse; this is a weird // format that nobody uses outside of SPSS. // (but we do need to remember to treat the resulting values // as character strings, not numerics!) if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) { long dateDatum = Long .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L - SPSS_DATE_BIAS; String newDatum = sdf_dhms.format(new Date(dateDatum)); dbgLog.finer("k=" + k + ":" + newDatum); casewiseRecordForTabFile.set(k, newDatum); } else { // decimal point included String[] timeData = casewiseRecordForTabFile.get(k).toString() .split("\\."); dbgLog.finer(StringUtils.join(timeData, "|")); long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_BIAS; StringBuilder sb_time = new StringBuilder( sdf_dhms.format(new Date(dateDatum))); dbgLog.finer(sb_time.toString()); if (formatDecimalPointPosition > 0) { sb_time.append( "." + timeData[1].substring(0, formatDecimalPointPosition)); } dbgLog.finer("k=" + k + ":" + sb_time.toString()); casewiseRecordForTabFile.set(k, sb_time.toString()); } } else if (printFormatTable.get(variableNameList.get(k)).equals("DATETIME")) { // TODO: // (for both datetime and "dateless" time) // keep the longest of the matching formats - i.e., if there are *some* // values in the vector that have thousands of a second, that should be // part of the saved format! // -- L.A. Aug. 12 2014 if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) { long dateDatum = Long .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L - SPSS_DATE_OFFSET; String newDatum = sdf_ymdhms.format(new Date(dateDatum)); dbgLog.finer("k=" + k + ":" + newDatum); casewiseRecordForTabFile.set(k, newDatum); dateFormatList[k] = sdf_ymdhms.toPattern(); } else { // decimal point included String[] timeData = casewiseRecordForTabFile.get(k).toString() .split("\\."); //dbgLog.finer(StringUtils.join(timeData, "|")); long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_OFFSET; StringBuilder sb_time = new StringBuilder( sdf_ymdhms.format(new Date(dateDatum))); //dbgLog.finer(sb_time.toString()); if (formatDecimalPointPosition > 0) { sb_time.append( "." + timeData[1].substring(0, formatDecimalPointPosition)); } dbgLog.finer("k=" + k + ":" + sb_time.toString()); casewiseRecordForTabFile.set(k, sb_time.toString()); dateFormatList[k] = sdf_ymdhms.toPattern() + (formatDecimalPointPosition > 0 ? ".S" : ""); } } else if (printFormatTable.get(variableNameList.get(k)).equals("TIME")) { // TODO: // double-check that we are handling "dateless" time correctly... -- L.A. Aug. 2014 if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) { long dateDatum = Long .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L; String newDatum = sdf_hms.format(new Date(dateDatum)); dbgLog.finer("k=" + k + ":" + newDatum); casewiseRecordForTabFile.set(k, newDatum); if (dateFormatList[k] == null) { dateFormatList[k] = sdf_hms.toPattern(); } } else { // decimal point included String[] timeData = casewiseRecordForTabFile.get(k).toString() .split("\\."); //dbgLog.finer(StringUtils.join(timeData, "|")); long dateDatum = Long.parseLong(timeData[0]) * 1000L; StringBuilder sb_time = new StringBuilder( sdf_hms.format(new Date(dateDatum))); //dbgLog.finer(sb_time.toString()); if (formatDecimalPointPosition > 0) { sb_time.append( "." + timeData[1].substring(0, formatDecimalPointPosition)); } dbgLog.finer("k=" + k + ":" + sb_time.toString()); casewiseRecordForTabFile.set(k, sb_time.toString()); String format_hmsS = sdf_hms.toPattern() + (formatDecimalPointPosition > 0 ? ".S" : ""); if (dateFormatList[k] == null || (format_hmsS.length() > dateFormatList[k].length())) { dateFormatList[k] = format_hmsS; } } } } else if (variableFormatType.equals("other")) { dbgLog.finer("other non-date/time case:=" + i); if (printFormatTable.get(variableNameList.get(k)).equals("WKDAY")) { // day of week dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k)); dbgLog.finer("data k=" + k + ":" + SPSSConstants.WEEKDAY_LIST.get( Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1)); String newDatum = SPSSConstants.WEEKDAY_LIST.get( Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1); casewiseRecordForTabFile.set(k, newDatum); dbgLog.finer("wkday:k=" + k + ":" + casewiseRecordForTabFile.get(k)); } else if (printFormatTable.get(variableNameList.get(k)).equals("MONTH")) { // month dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k)); dbgLog.finer("data k=" + k + ":" + SPSSConstants.MONTH_LIST.get( Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1)); String newDatum = SPSSConstants.MONTH_LIST.get( Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1); casewiseRecordForTabFile.set(k, newDatum); dbgLog.finer("month:k=" + k + ":" + casewiseRecordForTabFile.get(k)); } } } // end: date-time-datum check } // end: loop-k(2nd: variable-wise-check) // write to tab file if (casewiseRecordForTabFile.size() > 0) { pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t")); } // numeric contents-check for (int l = 0; l < casewiseRecordForTabFile.size(); l++) { if (variableFormatTypeList[l].equals("date") || variableFormatTypeList[l].equals("time") || printFormatTable.get(variableNameList.get(l)).equals("WKDAY") || printFormatTable.get(variableNameList.get(l)).equals("MONTH")) { // TODO: // figure out if any special handling is still needed here in 4.0. // -- L.A. - Aug. 2014 } else { if (variableTypelList.get(l) <= 0) { if (casewiseRecordForTabFile.get(l).toString().indexOf(".") >= 0) { decimalVariableSet.add(l); } } } } // reset the case-wise working objects casewiseRecordForTabFile.clear(); if (caseQnty > 0) { if (caseIndex == caseQnty) { hasReachedEOF = true; } } if (hasReachedEOF) { break; } } // if(The-end-of-a-case(row)-processing) } // loop-i (OBS unit) if ((hasReachedEOF) || (stream.available() == 0)) { // reached the end of this file // do exit-processing dbgLog.fine("***** reached the end of the file at " + ii + "th iteration *****"); break OBSERVATION; } ii++; } // while loop pwout.close(); } catch (IOException ex) { throw ex; } dbgLog.fine("<<<<<<"); dbgLog.fine("formatCategoryTable = " + formatCategoryTable); dbgLog.fine(">>>>>>"); dbgLog.fine("decimalVariableSet=" + decimalVariableSet); dbgLog.fine("decodeRecordTypeDataCompressed(): end"); }