List of usage examples for java.nio ByteBuffer order
Endianness order
To view the source code for java.nio ByteBuffer order.
Click Source Link
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.dta.DTAFileReader.java
private void decodeHeader(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeHeader(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }/*from w w w . j ava2 s . c o m*/ dbgLog.fine("reading the header segument 1: 4 byte\n"); byte[] magic_number = new byte[DTA_MAGIC_NUMBER_LENGTH]; int nbytes = stream.read(magic_number, 0, DTA_MAGIC_NUMBER_LENGTH); if (nbytes == 0) { throw new IOException(); } if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("hex dump: 1st 4bytes =>" + new String(Hex.encodeHex(magic_number)) + "<-"); if (magic_number[2] != 1) { dbgLog.fine("3rd byte is not 1: given file is not stata-dta type"); throw new IllegalArgumentException("given file is not stata-dta type"); } else if ((magic_number[1] != 1) && (magic_number[1] != 2)) { dbgLog.fine("2nd byte is neither 0 nor 1: this file is not stata-dta type"); throw new IllegalArgumentException("given file is not stata-dta type"); } else if (!STATA_RELEASE_NUMBER.containsKey((int) magic_number[0])) { dbgLog.fine("1st byte (" + magic_number[0] + ") is not within the ingestable range [rel. 3-10]:" + "we cannot ingest this Stata file."); throw new IllegalArgumentException("given file is not stata-dta type"); } else { releaseNumber = (int) magic_number[0]; smd.getFileInformation().put("releaseNumber", releaseNumber); smd.getFileInformation().put("byteOrder", (int) magic_number[1]); smd.getFileInformation().put("OSByteOrder", ByteOrder.nativeOrder().toString()); smd.getFileInformation().put("mimeType", MIME_TYPE[0]); smd.getFileInformation().put("fileFormat", MIME_TYPE[0]); init(); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("this file is stata-dta type: " + STATA_RELEASE_NUMBER.get((int) magic_number[0]) + "(Number=" + magic_number[0] + ")"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("Endian(file)(Big: 1; Little:2)=" + magic_number[1]); if ((int) magic_number[1] == 2) { isLittleEndian = true; dbgLog.fine("Reveral of the bytes is necessary to decode " + "multi-byte fields"); } if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("Endian of this platform:" + ByteOrder.nativeOrder().toString()); } dbgLog.fine("reading the remaining header segument 2: 60 or 109-byte"); byte[] header = new byte[headerLength]; nbytes = stream.read(header, 0, headerLength); //printHexDump(header, "header:\n"); // 1. number of variables: short (2 bytes) ByteBuffer bbnvar = ByteBuffer.wrap(header, 0, NVAR_FIELD_LENGTH); ByteBuffer dupnvar = bbnvar.duplicate(); short short_nvar = dupnvar.getShort(); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("get original short view(nvar)=" + short_nvar); if (isLittleEndian) { bbnvar.order(ByteOrder.LITTLE_ENDIAN); } short shrt_nvar = bbnvar.getShort(); smd.getFileInformation().put("varQnty", new Integer(shrt_nvar)); // setup variableTypeList int nvar = shrt_nvar; variableTypelList = new String[nvar]; // 2. number of observations: int (4 bytes) ByteBuffer nobs = ByteBuffer.wrap(header, NVAR_FIELD_LENGTH, NOBS_FIELD_LENGTH); ByteBuffer dupnobs = nobs.duplicate(); int int_dupnobs = dupnobs.getInt(); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("raw nobs=" + int_dupnobs); if (isLittleEndian) { nobs.order(ByteOrder.LITTLE_ENDIAN); } int int_nobs = nobs.getInt(); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("reversed nobs=" + int_nobs); smd.getFileInformation().put("caseQnty", new Integer(int_nobs)); // 3. data_label: 32 or 81 bytes int dl_offset = NVAR_FIELD_LENGTH + NOBS_FIELD_LENGTH; if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("dl_offset=" + dl_offset); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("data_label_length=" + dataLabelLength); String data_label = new String(Arrays.copyOfRange(header, dl_offset, (dl_offset + dataLabelLength)), "ISO-8859-1"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("data_label_length=" + data_label.length()); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("loation of the null character=" + data_label.indexOf(0)); String dataLabel = getNullStrippedString(data_label); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("data_label_length=" + dataLabel.length()); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("data_label=[" + dataLabel + "]"); smd.getFileInformation().put("dataLabel", dataLabel); // 4. time_stamp: ASCII String (18 bytes) // added after release 4 if (releaseNumber > 104) { int ts_offset = dl_offset + dataLabelLength; String time_stamp = new String(Arrays.copyOfRange(header, ts_offset, ts_offset + TIME_STAMP_LENGTH), "ISO-8859-1"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("time_stamp_length=" + time_stamp.length()); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("loation of the null character=" + time_stamp.indexOf(0)); String timeStamp = getNullStrippedString(time_stamp); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("timeStamp_length=" + timeStamp.length()); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("timeStamp=[" + timeStamp + "]"); smd.getFileInformation().put("timeStamp", timeStamp); smd.getFileInformation().put("fileDate", timeStamp); smd.getFileInformation().put("fileTime", timeStamp); smd.getFileInformation().put("varFormat_schema", "STATA"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("smd dump:" + smd.toString()); dbgLog.fine("***** decodeHeader(): end *****"); } }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.dta.DTAFileReader.java
private void decodeData(BufferedInputStream stream) throws IOException { dbgLog.fine("\n***** decodeData(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }/*from www .j a va 2s. com*/ int nvar = (Integer) smd.getFileInformation().get("varQnty"); int nobs = (Integer) smd.getFileInformation().get("caseQnty"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("data diminsion[rxc]=(" + nobs + "," + nvar + ")"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("bytes per row=" + bytes_per_row + " bytes"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("variableTypelList=" + Arrays.deepToString(variableTypelList)); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("StringVariableTable=" + StringVariableTable); FileOutputStream fileOutTab = null; PrintWriter pwout = null; // create a File object to save the tab-delimited data file File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab"); String tabDelimitedDataFileName = tabDelimitedDataFile.getAbsolutePath(); // save the temp file name in the metadata object smd.getFileInformation().put("tabDelimitedDataFileLocation", tabDelimitedDataFileName); fileOutTab = new FileOutputStream(tabDelimitedDataFile); pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); // data storage // Object[][] dataTable = new Object[nobs][nvar]; // for later variable-wise calculations of statistics // dataTable2 sotres cut-out data columnwise Object[][] dataTable2 = new Object[nvar][nobs]; String[][] dateFormat = new String[nvar][nobs]; for (int i = 0; i < nobs; i++) { byte[] dataRowBytes = new byte[bytes_per_row]; Object[] dataRow = new Object[nvar]; int nbytes = stream.read(dataRowBytes, 0, bytes_per_row); if (nbytes == 0) { String errorMessage = "reading data: no data were read at(" + i + "th row)"; throw new IOException(errorMessage); } // decoding each row int byte_offset = 0; for (int columnCounter = 0; columnCounter < variableTypelList.length; columnCounter++) { Integer varType = variableTypeMap.get(variableTypelList[columnCounter]); String variableFormat = variableFormats[columnCounter]; boolean isDateTimeDatum = isDateTimeDatumList[columnCounter]; switch (varType != null ? varType : 256) { case -5: // Byte case // note: 1 byte signed byte byte_datum = dataRowBytes[byte_offset]; if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column byte =" + byte_datum); if (byte_datum >= BYTE_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum); dataRow[columnCounter] = MissingValueForTextDataFileNumeric; dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF } else { dataRow[columnCounter] = byte_datum; dataTable2[columnCounter][i] = byte_datum; } byte_offset++; break; case -4: // Stata-int (=java's short: 2byte) case // note: 2-byte signed int, not java's int ByteBuffer int_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 2); if (isLittleEndian) { int_buffer.order(ByteOrder.LITTLE_ENDIAN); } short short_datum = int_buffer.getShort(); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column stata int =" + short_datum); if (short_datum >= INT_MISSIG_VALUE) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column stata long missing value=" + short_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("short", variableFormat, Short.toString(short_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = short_datum; dataRow[columnCounter] = short_datum; } } byte_offset += 2; break; case -3: // stata-Long (= java's int: 4 byte) case // note: 4-byte singed, not java's long dbgLog.fine("DATreader: stata long"); ByteBuffer long_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { long_buffer.order(ByteOrder.LITTLE_ENDIAN); } int int_datum = long_buffer.getInt(); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row " + columnCounter + "=th column stata long =" + int_datum); if (int_datum >= LONG_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row " + columnCounter + "=th column stata long missing value=" + int_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("int", variableFormat, Integer.toString(int_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = int_datum; dataRow[columnCounter] = int_datum; } } byte_offset += 4; break; case -2: // float case // note: 4-byte ByteBuffer float_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { float_buffer.order(ByteOrder.LITTLE_ENDIAN); } float float_datum = float_buffer.getFloat(); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column float =" + float_datum); if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column float missing value=" + float_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("float", variableFormat, doubleNumberFormatter.format(float_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = float_datum; dataRow[columnCounter] = float_datum; } } byte_offset += 4; break; case -1: // double case // note: 8-byte ByteBuffer double_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 8); if (isLittleEndian) { double_buffer.order(ByteOrder.LITTLE_ENDIAN); } double double_datum = double_buffer.getDouble(); if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) { dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column double missing value=" + double_datum); if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("double", variableFormat, doubleNumberFormatter.format(double_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = double_datum; dataRow[columnCounter] = doubleNumberFormatter.format(double_datum); } } byte_offset += 8; break; case 0: // String case int strVarLength = StringVariableTable.get(columnCounter); String raw_datum = new String( Arrays.copyOfRange(dataRowBytes, byte_offset, (byte_offset + strVarLength)), "ISO-8859-1"); String string_datum = getNullStrippedString(raw_datum); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column string =" + string_datum); if (string_datum.equals("")) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column string missing value=" + string_datum); dataRow[columnCounter] = MissingValueForTextDataFileString; dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF } else { String escapedString = string_datum.replaceAll("\"", Matcher.quoteReplacement("\\\"")); /* * Fixing the bug we've had in the Stata reader for * a longest time: new lines and tabs need to * be escaped too - otherwise it breaks our * TAB file structure! -- L.A. */ escapedString = escapedString.replaceAll("\t", Matcher.quoteReplacement("\\t")); escapedString = escapedString.replaceAll("\n", Matcher.quoteReplacement("\\n")); escapedString = escapedString.replaceAll("\r", Matcher.quoteReplacement("\\r")); // the escaped version of the string will be // stored in the tab file: dataRow[columnCounter] = "\"" + escapedString + "\""; // but note that the "raw" version of it is // used for the UNF: dataTable2[columnCounter][i] = string_datum; } byte_offset += strVarLength; break; default: dbgLog.fine("unknown variable type found"); String errorMessage = "unknow variable Type found at data section"; throw new InvalidObjectException(errorMessage); } // switch } // for-columnCounter // dump the row of data to the external file pwout.println(StringUtils.join(dataRow, "\t")); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row's data={" + StringUtils.join(dataRow, ",") + "};"); } // for- i (row) pwout.close(); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer("\ndataTable2(variable-wise):\n"); dbgLog.finer(Arrays.deepToString(dataTable2)); dbgLog.finer("\ndateFormat(variable-wise):\n"); dbgLog.finer(Arrays.deepToString(dateFormat)); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("variableTypelList:\n" + Arrays.deepToString(variableTypelList)); dbgLog.fine("variableTypelListFinal:\n" + Arrays.deepToString(variableTypelListFinal)); } String[] unfValues = new String[nvar]; for (int j = 0; j < nvar; j++) { String variableType_j = variableTypelListFinal[j]; unfValues[j] = getUNF(dataTable2[j], dateFormat[j], variableType_j, unfVersionNumber, j); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(j + "th unf value" + unfValues[j]); } if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("unf set:\n" + Arrays.deepToString(unfValues)); fileUnfValue = UNF5Util.calculateUNF(unfValues); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("file-unf=" + fileUnfValue); stataDataSection.setUnf(unfValues); stataDataSection.setFileUnf(fileUnfValue); smd.setVariableUNF(unfValues); smd.getFileInformation().put("fileUNF", fileUnfValue); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("unf values:\n" + unfValues); stataDataSection.setData(dataTable2); // close the stream dbgLog.fine("***** decodeData(): end *****\n\n"); }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java
private void decodeData(BufferedInputStream stream) throws IOException { dbgLog.fine("\n***** decodeData(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }//from ww w.jav a2 s .c o m //int nvar = (Integer)smd.getFileInformation().get("varQnty"); int nvar = dataTable.getVarQuantity().intValue(); //int nobs = (Integer)smd.getFileInformation().get("caseQnty"); int nobs = dataTable.getCaseQuantity().intValue(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("data dimensions[observations x variables] = (" + nobs + "x" + nvar + ")"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("bytes per row=" + bytes_per_row + " bytes"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("variableTypes=" + Arrays.deepToString(variableTypes)); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("StringLengthTable=" + StringLengthTable); } // create a File object to save the tab-delimited data file FileOutputStream fileOutTab = null; PrintWriter pwout = null; File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab"); // save the temp tab-delimited file in the return ingest object: ingesteddata.setTabDelimitedFile(tabDelimitedDataFile); fileOutTab = new FileOutputStream(tabDelimitedDataFile); pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); /* Should we lose this dateFormat thing in 4.0? * the UNF should be calculatable on the app side solely from the data * stored in the tab file and the type information stored the dataVariable * object. * furthermore, the very idea of storing a format entry not just for * every variable, but for every value/observation is a bit strange. * TODO: review and confirm that, in the 3.* implementation, every * entry in dateFormat[nvar][*] is indeed the same - except for the * missing value entries. -- L.A. 4.0 (OK, I got rid of the dateFormat; instead I kinda sorta assume that the format is the same for every value in a column, save for the missing values... like this: dataTable.getDataVariables().get(columnCounter).setFormatSchemaName(ddt.format); BUT, this needs to be reviewed/confirmed etc! */ //String[][] dateFormat = new String[nvar][nobs]; for (int i = 0; i < nobs; i++) { byte[] dataRowBytes = new byte[bytes_per_row]; Object[] dataRow = new Object[nvar]; int nbytes = stream.read(dataRowBytes, 0, bytes_per_row); if (nbytes == 0) { String errorMessage = "reading data: no data were read at(" + i + "th row)"; throw new IOException(errorMessage); } // decoding each row int byte_offset = 0; for (int columnCounter = 0; columnCounter < variableTypes.length; columnCounter++) { Integer varType = variableTypeMap.get(variableTypes[columnCounter]); // 4.0 Check if this is a time/date variable: boolean isDateTimeDatum = false; String formatCategory = dataTable.getDataVariables().get(columnCounter).getFormatCategory(); if (formatCategory != null && (formatCategory.equals("time") || formatCategory.equals("date"))) { isDateTimeDatum = true; } String variableFormat = dateVariableFormats[columnCounter]; switch (varType != null ? varType : 256) { case -5: // Byte case // note: 1 byte signed byte byte_datum = dataRowBytes[byte_offset]; if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column byte =" + byte_datum); } if (byte_datum >= BYTE_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { dataRow[columnCounter] = byte_datum; } byte_offset++; break; case -4: // Stata-int (=java's short: 2byte) case // note: 2-byte signed int, not java's int ByteBuffer int_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 2); if (isLittleEndian) { int_buffer.order(ByteOrder.LITTLE_ENDIAN); } short short_datum = int_buffer.getShort(); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column stata int =" + short_datum); } if (short_datum >= INT_MISSIG_VALUE) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column stata long missing value=" + short_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("short", variableFormat, Short.toString(short_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; //dateFormat[columnCounter][i] = ddt.format; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = short_datum; } } byte_offset += 2; break; case -3: // stata-Long (= java's int: 4 byte) case // note: 4-byte singed, not java's long //dbgLog.fine("DATreader: stata long"); ByteBuffer long_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { long_buffer.order(ByteOrder.LITTLE_ENDIAN); } int int_datum = long_buffer.getInt(); if (dbgLog.isLoggable(Level.FINE)) { //dbgLog.fine(i + "-th row " + columnCounter // + "=th column stata long =" + int_datum); } if (int_datum >= LONG_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINE)) { //dbgLog.fine(i + "-th row " + columnCounter // + "=th column stata long missing value=" + int_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("int", variableFormat, Integer.toString(int_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = int_datum; } } byte_offset += 4; break; case -2: // float case // note: 4-byte ByteBuffer float_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { float_buffer.order(ByteOrder.LITTLE_ENDIAN); } float float_datum = float_buffer.getFloat(); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column float =" + float_datum); } if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column float missing value=" + float_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("float", variableFormat, doubleNumberFormatter.format(float_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = float_datum; // This may be temporary - but for now (as in, while I'm testing // 4.0 ingest against 3.* ingest, I need to be able to tell if a // floating point value was a single, or double float in the // original STATA file: -- L.A. Jul. 2014 dataTable.getDataVariables().get(columnCounter).setFormat("float"); } } byte_offset += 4; break; case -1: // double case // note: 8-byte ByteBuffer double_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 8); if (isLittleEndian) { double_buffer.order(ByteOrder.LITTLE_ENDIAN); } double double_datum = double_buffer.getDouble(); if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column double missing value=" + double_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("double", variableFormat, doubleNumberFormatter.format(double_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = doubleNumberFormatter.format(double_datum); } } byte_offset += 8; break; case 0: // String case int strVarLength = StringLengthTable.get(columnCounter); String raw_datum = new String( Arrays.copyOfRange(dataRowBytes, byte_offset, (byte_offset + strVarLength)), "ISO-8859-1"); // TODO: // is it the right thing to do, to default to "ISO-8859-1"? // (it may be; since there's no mechanism for specifying // alternative encodings in Stata, this may be their default; // it just needs to be verified. -- L.A. Jul. 2014) String string_datum = getNullStrippedString(raw_datum); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column string =" + string_datum); } if (string_datum.isEmpty()) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column string missing value=" + string_datum); } // TODO: /* Is this really a missing value case? * Or is it an honest empty string? * Is there such a thing as a missing value for a String in Stata? * -- L.A. 4.0 */ dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { /* * Some special characters, like new lines and tabs need to * be escaped - otherwise they will break our TAB file * structure! * But before we escape anything, all the back slashes * already in the string need to be escaped themselves. */ String escapedString = string_datum.replace("\\", "\\\\"); // escape quotes: escapedString = escapedString.replaceAll("\"", Matcher.quoteReplacement("\\\"")); // escape tabs and new lines: escapedString = escapedString.replaceAll("\t", Matcher.quoteReplacement("\\t")); escapedString = escapedString.replaceAll("\n", Matcher.quoteReplacement("\\n")); escapedString = escapedString.replaceAll("\r", Matcher.quoteReplacement("\\r")); // the escaped version of the string is stored in the tab file // enclosed in double-quotes; this is in order to be able // to differentiate between an empty string (tab-delimited empty string in // double quotes) and a missing value (tab-delimited empty string). // Although the question still remains - is it even possible // to store an empty string, that's not a missing value, in Stata? // - see the comment in the missing value case above. -- L.A. 4.0 dataRow[columnCounter] = "\"" + escapedString + "\""; } byte_offset += strVarLength; break; default: dbgLog.fine("unknown variable type found"); String errorMessage = "unknow variable Type found at data section"; throw new InvalidObjectException(errorMessage); } // switch } // for-columnCounter // Dump the row of data to the tab-delimited file we are producing: pwout.println(StringUtils.join(dataRow, "\t")); if (dbgLog.isLoggable(Level.FINE)) { //dbgLog.fine(i + "-th row's data={" + StringUtils.join(dataRow, ",") + "};"); } } // for- i (row) pwout.close(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("variableTypes:\n" + Arrays.deepToString(variableTypes)); } dbgLog.fine("DTA Ingest: decodeData(): end."); }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java
private void decodeHeader(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeHeader(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }/*from www . j a va2 s . co m*/ dbgLog.fine("reading the header segument 1: 4 byte\n"); byte[] magic_number = new byte[DTA_MAGIC_NUMBER_LENGTH]; int nbytes = stream.read(magic_number, 0, DTA_MAGIC_NUMBER_LENGTH); if (nbytes == 0) { throw new IOException(); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("hex dump: 1st 4bytes =>" + new String(Hex.encodeHex(magic_number)) + "<-"); } if (magic_number[2] != 1) { dbgLog.fine("3rd byte is not 1: given file is not stata-dta type"); throw new IllegalArgumentException("The file is not in a STATA format that we can read or support."); } else if ((magic_number[1] != 1) && (magic_number[1] != 2)) { dbgLog.fine("2nd byte is neither 0 nor 1: this file is not stata-dta type"); throw new IllegalArgumentException("given file is not stata-dta type"); } else if (!STATA_RELEASE_NUMBER.containsKey((int) magic_number[0])) { dbgLog.fine("1st byte (" + magic_number[0] + ") is not within the ingestable range [rel. 3-10]:" + "we cannot ingest this Stata file."); throw new IllegalArgumentException("given file is not stata-dta type"); } else { releaseNumber = magic_number[0]; init(); dataTable.setOriginalFileFormat(MIME_TYPE[0]); /* * releaseNumber: * for storing in the datatable, we are converting the numeric Stata * release number into a more user friendly "version number"; * e.g., "release number 115" = "Stata v. 12" * -- L.A. 4.0 */ dataTable.setOriginalFormatVersion(STATA_RELEASE_NUMBER.get(releaseNumber)); dataTable.setUnf("UNF:6:FILEFILEFILEFILE"); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("this file is stata-dta type: " + STATA_RELEASE_NUMBER.get(releaseNumber) + " (that means Stata version " + releaseNumber + ")"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("Endian(file)(Big: 1; Little:2)=" + magic_number[1]); } /* * byte order: defined in the second byte of the "magic number": */ if (magic_number[1] == 2) { isLittleEndian = true; dbgLog.fine("Reversal of the bytes is necessary to decode " + "multi-byte fields"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("Endian of this platform:" + ByteOrder.nativeOrder().toString()); } } dbgLog.fine("reading the remaining header segument 2: 60 or 109-byte"); byte[] header = new byte[headerLength]; nbytes = stream.read(header, 0, headerLength); // 1. number of variables: short (2 bytes) ByteBuffer bbnvar = ByteBuffer.wrap(header, 0, NVAR_FIELD_LENGTH); ByteBuffer dupnvar = bbnvar.duplicate(); short short_nvar = dupnvar.getShort(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("get original short view(nvar)=" + short_nvar); } if (isLittleEndian) { bbnvar.order(ByteOrder.LITTLE_ENDIAN); } short shrt_nvar = bbnvar.getShort(); dataTable.setVarQuantity(new Long(shrt_nvar)); int nvar = shrt_nvar; if (dbgLog.isLoggable(Level.INFO)) { dbgLog.info("number of variables(nvar)=" + nvar); } // 4.0 Initialize dataverse variable objects: List<DataVariable> variableList = new ArrayList<>(); for (int i = 0; i < nvar; i++) { DataVariable dv = new DataVariable(); dv.setInvalidRanges(new ArrayList<>()); dv.setSummaryStatistics(new ArrayList<>()); dv.setUnf("UNF:6:XXX"); dv.setCategories(new ArrayList<>()); variableList.add(dv); dv.setFileOrder(i); dv.setDataTable(dataTable); } dataTable.setDataVariables(variableList); // setup variableTypeList variableTypes = new String[nvar]; // and the date/time format list: dateVariableFormats = new String[nvar]; // 2. number of observations: int (4 bytes) ByteBuffer nobs = ByteBuffer.wrap(header, NVAR_FIELD_LENGTH, NOBS_FIELD_LENGTH); ByteBuffer dupnobs = nobs.duplicate(); int int_dupnobs = dupnobs.getInt(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("raw nobs=" + int_dupnobs); } if (isLittleEndian) { nobs.order(ByteOrder.LITTLE_ENDIAN); } int int_nobs = nobs.getInt(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("reversed nobs=" + int_nobs); } // smd.getFileInformation().put("caseQnty", new Integer(int_nobs)); dataTable.setCaseQuantity(new Long(int_nobs)); /* the "data label" - note that we are not using this label for anything (wonder what it is though? can we use it somewhere?) but we still need to extract it from the byte stream, since the offsets of the objects stored further up are calculated relative to it. -- L.A., 4.0 */ // 3. data_label: 32 or 81 bytes int dl_offset = NVAR_FIELD_LENGTH + NOBS_FIELD_LENGTH; if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("dl_offset=" + dl_offset); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("data_label_length=" + dataLabelLength); } String data_label = new String(Arrays.copyOfRange(header, dl_offset, (dl_offset + dataLabelLength)), "ISO-8859-1"); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("data_label_length=" + data_label.length()); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("loation of the null character=" + data_label.indexOf(0)); } String dataLabel = getNullStrippedString(data_label); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("data_label_length=" + dataLabel.length()); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("data_label=[" + dataLabel + "]"); } // smd.getFileInformation().put("dataLabel", dataLabel); /* end of "data label" */ // 4. time_stamp: ASCII String (18 bytes) // added after release 4 if (releaseNumber > 104) { int ts_offset = dl_offset + dataLabelLength; String time_stamp = new String(Arrays.copyOfRange(header, ts_offset, ts_offset + TIME_STAMP_LENGTH), "ISO-8859-1"); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("time_stamp_length=" + time_stamp.length()); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("loation of the null character=" + time_stamp.indexOf(0)); } String timeStamp = getNullStrippedString(time_stamp); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("timeStamp_length=" + timeStamp.length()); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("timeStamp=[" + timeStamp + "]"); } } }
From source file:nodomain.freeyourgadget.gadgetbridge.service.devices.pebble.PebbleProtocol.java
private GBDeviceEvent decodeVoiceControl(ByteBuffer buf) { buf.order(ByteOrder.LITTLE_ENDIAN); byte command = buf.get(); int flags = buf.getInt(); byte session_type = buf.get(); //0x01 dictation 0x02 command short session_id = buf.getShort(); //attributes//from ww w .java2s. c o m byte count = buf.get(); byte type = buf.get(); short length = buf.getShort(); byte[] version = new byte[20]; buf.get(version); //it's a string like "1.2rc1" int sample_rate = buf.getInt(); short bit_rate = buf.getShort(); byte bitstream_version = buf.get(); short frame_size = buf.getShort(); GBDeviceEventSendBytes sendBytes = new GBDeviceEventSendBytes(); if (command == 0x01) { //session setup sendBytes.encodedBytes = null; } else if (command == 0x02) { //dictation result sendBytes.encodedBytes = null; } return sendBytes; }
From source file:nodomain.freeyourgadget.gadgetbridge.service.devices.pebble.PebbleProtocol.java
private UUID getUUID(ByteBuffer buf) { ByteOrder byteOrder = buf.order(); buf.order(ByteOrder.BIG_ENDIAN); long uuid_high = buf.getLong(); long uuid_low = buf.getLong(); buf.order(byteOrder);//from w w w . ja v a 2 s . c o m return new UUID(uuid_high, uuid_low); }
From source file:nodomain.freeyourgadget.gadgetbridge.service.devices.pebble.PebbleProtocol.java
private byte[] encodeDatalog(byte handle, byte reply) { ByteBuffer buf = ByteBuffer.allocate(LENGTH_PREFIX + 2); buf.order(ByteOrder.BIG_ENDIAN); buf.putShort((short) 2); buf.putShort(ENDPOINT_DATALOG);//from w w w . ja va 2 s .com buf.put(reply); buf.put(handle); return buf.array(); }
From source file:nodomain.freeyourgadget.gadgetbridge.service.devices.pebble.PebbleProtocol.java
byte[] encodeActivateHealth(boolean activate) { byte[] blob;/*from w ww . j a v a2 s .c om*/ if (activate) { ByteBuffer buf = ByteBuffer.allocate(9); buf.order(ByteOrder.LITTLE_ENDIAN); ActivityUser activityUser = new ActivityUser(); Integer heightMm = activityUser.getHeightCm() * 10; buf.putShort(heightMm.shortValue()); Integer weigthDag = activityUser.getWeightKg() * 100; buf.putShort(weigthDag.shortValue()); buf.put((byte) 0x01); //activate tracking buf.put((byte) 0x00); //activity Insights buf.put((byte) 0x00); //sleep Insights buf.put((byte) activityUser.getAge()); buf.put((byte) activityUser.getGender()); blob = buf.array(); } else { blob = new byte[] { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; } return encodeBlobdb("activityPreferences", BLOBDB_INSERT, BLOBDB_PREFERENCES, blob); }
From source file:nodomain.freeyourgadget.gadgetbridge.service.devices.pebble.PebbleProtocol.java
byte[] encodeInstallMetadata(UUID uuid, String appName, short appVersion, short sdkVersion, int flags, int iconId) { final short METADATA_LENGTH = 126; byte[] name_buf = new byte[96]; System.arraycopy(appName.getBytes(), 0, name_buf, 0, appName.getBytes().length); ByteBuffer buf = ByteBuffer.allocate(METADATA_LENGTH); buf.order(ByteOrder.BIG_ENDIAN); buf.putLong(uuid.getMostSignificantBits()); // watchapp uuid buf.putLong(uuid.getLeastSignificantBits()); buf.order(ByteOrder.LITTLE_ENDIAN); buf.putInt(flags);//from w w w . j av a 2 s .co m buf.putInt(iconId); buf.putShort(appVersion); buf.putShort(sdkVersion); buf.put((byte) 0); // app_face_bgcolor buf.put((byte) 0); // app_face_template_id buf.put(name_buf); // 96 bytes return encodeBlobdb(uuid, BLOBDB_INSERT, BLOBDB_APP, buf.array()); }
From source file:nodomain.freeyourgadget.gadgetbridge.service.devices.pebble.PebbleProtocol.java
@Override public byte[] encodeAppReorder(UUID[] uuids) { int length = 2 + uuids.length * LENGTH_UUID; ByteBuffer buf = ByteBuffer.allocate(LENGTH_PREFIX + length); buf.order(ByteOrder.BIG_ENDIAN); buf.putShort((short) length); buf.putShort(ENDPOINT_APPREORDER);// w w w . j av a2 s . com buf.put((byte) 0x01); buf.put((byte) uuids.length); for (UUID uuid : uuids) { buf.putLong(uuid.getMostSignificantBits()); buf.putLong(uuid.getLeastSignificantBits()); } return buf.array(); }