List of usage examples for java.nio ByteBuffer getShort
public abstract short getShort();
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.dta.DTAFileReader.java
private void decodeExpansionFields(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeExpansionFields(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }/*from ww w . ja v a2 s. c o m*/ // Added since release 105 // [1-byte byte_field][short(2)/int(4)_field][variable_field whose // length is specified by the previous short/int field] int int_type_expansion_field = constantTable.get("EXPANSION"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("int_type_expansion_field=" + int_type_expansion_field); while (true) { byte[] firstByte = new byte[1]; byte[] lengthBytes = new byte[int_type_expansion_field]; int nbyte = stream.read(firstByte, 0, 1); dbgLog.fine("read 1st byte"); int nbytes = stream.read(lengthBytes, 0, int_type_expansion_field); dbgLog.fine("read next integer"); ByteBuffer bb_field_length = ByteBuffer.wrap(lengthBytes); if (isLittleEndian) { bb_field_length.order(ByteOrder.LITTLE_ENDIAN); dbgLog.fine("byte reversed"); } int field_length; if (int_type_expansion_field == 2) { field_length = bb_field_length.getShort(); } else { field_length = bb_field_length.getInt(); } if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("field_length=" + field_length); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("firstByte[0]=" + firstByte[0]); if ((field_length + (int) firstByte[0]) == 0) { // reached the end of this field break; } else { byte[] stringField = new byte[field_length]; nbyte = stream.read(stringField, 0, field_length); } } dbgLog.fine("***** decodeExpansionFields(): end *****"); }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java
private void decodeExpansionFields(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeExpansionFields(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }//from w w w. jav a 2s.c om // Added since release 105 // [1-byte byte_field][short(2)/int(4)_field][variable_field whose // length is specified by the previous short/int field] int int_type_expansion_field = constantTable.get("EXPANSION"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("int_type_expansion_field=" + int_type_expansion_field); while (true) { byte[] firstByte = new byte[1]; byte[] lengthBytes = new byte[int_type_expansion_field]; int nbyte = stream.read(firstByte, 0, 1); dbgLog.fine("read 1st byte"); int nbytes = stream.read(lengthBytes, 0, int_type_expansion_field); dbgLog.fine("read next integer"); ByteBuffer bb_field_length = ByteBuffer.wrap(lengthBytes); if (isLittleEndian) { bb_field_length.order(ByteOrder.LITTLE_ENDIAN); dbgLog.fine("byte reversed"); } int field_length; if (int_type_expansion_field == 2) { field_length = bb_field_length.getShort(); } else { field_length = bb_field_length.getInt(); } if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("field_length=" + field_length); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("firstByte[0]=" + firstByte[0]); if ((field_length + firstByte[0]) == 0) { // reached the end of this field break; } else { byte[] stringField = new byte[field_length]; nbyte = stream.read(stringField, 0, field_length); } } dbgLog.fine("decodeExpansionFields(): end"); }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.dta.DTAFileReader.java
void parseValueLabelsRelease105(BufferedInputStream stream) throws IOException { dbgLog.fine("***** parseValueLabelsRelease105(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }//from w w w .j a v a 2s . c o m int nvar = (Integer) smd.getFileInformation().get("varQnty"); int length_label_name = constantTable.get("NAME") + 1; // note: caution +1 as the null character, not 9 byte int length_value_label_header = value_label_table_length + length_label_name; if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("value_label_table_length=" + value_label_table_length); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("length_value_label_header=" + length_value_label_header); int length_lable_name_field = 8; /* Seg field byte type 1-1. no of pairs 2 int (= m) 1-2. vlt_name 10 includes char+(\0) == name used in Sec2.part 5 ----------------------------------- 11 2-1. values 2*n int[] 2-2. labels 8*n char */ for (int i = 0; i < nvar; i++) { if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("\n\n" + i + "th value-label table header"); byte[] valueLabelHeader = new byte[length_value_label_header]; // Part 1: reading the header of a value-label table if exists int nbytes = stream.read(valueLabelHeader, 0, length_value_label_header); if (nbytes == 0) { throw new IOException("reading value label header: no datum"); } // 1.1 number of value-label pairs in this table (= m) ByteBuffer bb_value_label_pairs = ByteBuffer.wrap(valueLabelHeader, 0, value_label_table_length); if (isLittleEndian) { bb_value_label_pairs.order(ByteOrder.LITTLE_ENDIAN); //if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("value lable table lenth: byte reversed"); } int no_value_label_pairs = bb_value_label_pairs.getShort(); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("no_value_label_pairs=" + no_value_label_pairs); // 1.2 labelName String rawLabelName = new String(Arrays.copyOfRange(valueLabelHeader, value_label_table_length, (value_label_table_length + length_label_name)), "ISO-8859-1"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("rawLabelName(length)=" + rawLabelName.length()); String labelName = rawLabelName.substring(0, rawLabelName.indexOf(0)); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("label name = " + labelName + "\n"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th value-label table"); // Part 2: reading the value-label table // the length of the value-label table is: 2*m + 8*m = 10*m int length_value_label_table = (value_label_table_length + length_lable_name_field) * no_value_label_pairs; if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("length_value_label_table=" + length_value_label_table); byte[] valueLabelTable_i = new byte[length_value_label_table]; int noBytes = stream.read(valueLabelTable_i, 0, length_value_label_table); if (noBytes == 0) { throw new IOException("reading value label table: no datum"); } // 2-1. 2-byte-integer array (2*m): value array (sorted) short[] valueList = new short[no_value_label_pairs]; int offset_value = 0; for (int k = 0; k < no_value_label_pairs; k++) { ByteBuffer bb_value_list = ByteBuffer.wrap(valueLabelTable_i, offset_value, value_label_table_length); if (isLittleEndian) { bb_value_list.order(ByteOrder.LITTLE_ENDIAN); } valueList[k] = bb_value_list.getShort(); offset_value += value_label_table_length; } if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("value_list=" + Arrays.toString(valueList) + "\n"); // 2-2. 8-byte chars that store label data (m units of labels) if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("current offset_value=" + offset_value); int offset_start = offset_value; int offset_end = offset_value + length_lable_name_field; String[] labelList = new String[no_value_label_pairs]; for (int l = 0; l < no_value_label_pairs; l++) { String string_l = new String(Arrays.copyOfRange(valueLabelTable_i, offset_start, offset_end), "ISO-8859-1"); int null_position = string_l.indexOf(0); if (null_position != -1) { labelList[l] = string_l.substring(0, null_position); } else { labelList[l] = string_l; } offset_start = offset_end; offset_end += length_lable_name_field; } Map<String, String> tmpValueLabelTable = new LinkedHashMap<String, String>(); for (int j = 0; j < no_value_label_pairs; j++) { if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(j + "-th pair:" + valueList[j] + "[" + labelList[j] + "]"); tmpValueLabelTable.put(Integer.toString(valueList[j]), labelList[j]); } valueLabelTable.put(labelName, tmpValueLabelTable); if (stream.available() == 0) { // reached the end of this file // do exit-processing if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("***** reached the end of the file at " + i + "th value-label Table *****"); break; } } // for-loop if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("valueLabelTable:\n" + valueLabelTable); smd.setValueLabelTable(valueLabelTable); dbgLog.fine("***** parseValueLabelsRelease105(): end *****"); }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.dta.DTAFileReader.java
private void decodeData(BufferedInputStream stream) throws IOException { dbgLog.fine("\n***** decodeData(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }// w ww . j a v a 2 s. c o m int nvar = (Integer) smd.getFileInformation().get("varQnty"); int nobs = (Integer) smd.getFileInformation().get("caseQnty"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("data diminsion[rxc]=(" + nobs + "," + nvar + ")"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("bytes per row=" + bytes_per_row + " bytes"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("variableTypelList=" + Arrays.deepToString(variableTypelList)); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("StringVariableTable=" + StringVariableTable); FileOutputStream fileOutTab = null; PrintWriter pwout = null; // create a File object to save the tab-delimited data file File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab"); String tabDelimitedDataFileName = tabDelimitedDataFile.getAbsolutePath(); // save the temp file name in the metadata object smd.getFileInformation().put("tabDelimitedDataFileLocation", tabDelimitedDataFileName); fileOutTab = new FileOutputStream(tabDelimitedDataFile); pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); // data storage // Object[][] dataTable = new Object[nobs][nvar]; // for later variable-wise calculations of statistics // dataTable2 sotres cut-out data columnwise Object[][] dataTable2 = new Object[nvar][nobs]; String[][] dateFormat = new String[nvar][nobs]; for (int i = 0; i < nobs; i++) { byte[] dataRowBytes = new byte[bytes_per_row]; Object[] dataRow = new Object[nvar]; int nbytes = stream.read(dataRowBytes, 0, bytes_per_row); if (nbytes == 0) { String errorMessage = "reading data: no data were read at(" + i + "th row)"; throw new IOException(errorMessage); } // decoding each row int byte_offset = 0; for (int columnCounter = 0; columnCounter < variableTypelList.length; columnCounter++) { Integer varType = variableTypeMap.get(variableTypelList[columnCounter]); String variableFormat = variableFormats[columnCounter]; boolean isDateTimeDatum = isDateTimeDatumList[columnCounter]; switch (varType != null ? varType : 256) { case -5: // Byte case // note: 1 byte signed byte byte_datum = dataRowBytes[byte_offset]; if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column byte =" + byte_datum); if (byte_datum >= BYTE_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum); dataRow[columnCounter] = MissingValueForTextDataFileNumeric; dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF } else { dataRow[columnCounter] = byte_datum; dataTable2[columnCounter][i] = byte_datum; } byte_offset++; break; case -4: // Stata-int (=java's short: 2byte) case // note: 2-byte signed int, not java's int ByteBuffer int_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 2); if (isLittleEndian) { int_buffer.order(ByteOrder.LITTLE_ENDIAN); } short short_datum = int_buffer.getShort(); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column stata int =" + short_datum); if (short_datum >= INT_MISSIG_VALUE) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column stata long missing value=" + short_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("short", variableFormat, Short.toString(short_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = short_datum; dataRow[columnCounter] = short_datum; } } byte_offset += 2; break; case -3: // stata-Long (= java's int: 4 byte) case // note: 4-byte singed, not java's long dbgLog.fine("DATreader: stata long"); ByteBuffer long_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { long_buffer.order(ByteOrder.LITTLE_ENDIAN); } int int_datum = long_buffer.getInt(); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row " + columnCounter + "=th column stata long =" + int_datum); if (int_datum >= LONG_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row " + columnCounter + "=th column stata long missing value=" + int_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("int", variableFormat, Integer.toString(int_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = int_datum; dataRow[columnCounter] = int_datum; } } byte_offset += 4; break; case -2: // float case // note: 4-byte ByteBuffer float_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { float_buffer.order(ByteOrder.LITTLE_ENDIAN); } float float_datum = float_buffer.getFloat(); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column float =" + float_datum); if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column float missing value=" + float_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("float", variableFormat, doubleNumberFormatter.format(float_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = float_datum; dataRow[columnCounter] = float_datum; } } byte_offset += 4; break; case -1: // double case // note: 8-byte ByteBuffer double_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 8); if (isLittleEndian) { double_buffer.order(ByteOrder.LITTLE_ENDIAN); } double double_datum = double_buffer.getDouble(); if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) { dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column double missing value=" + double_datum); if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("double", variableFormat, doubleNumberFormatter.format(double_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = double_datum; dataRow[columnCounter] = doubleNumberFormatter.format(double_datum); } } byte_offset += 8; break; case 0: // String case int strVarLength = StringVariableTable.get(columnCounter); String raw_datum = new String( Arrays.copyOfRange(dataRowBytes, byte_offset, (byte_offset + strVarLength)), "ISO-8859-1"); String string_datum = getNullStrippedString(raw_datum); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column string =" + string_datum); if (string_datum.equals("")) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column string missing value=" + string_datum); dataRow[columnCounter] = MissingValueForTextDataFileString; dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF } else { String escapedString = string_datum.replaceAll("\"", Matcher.quoteReplacement("\\\"")); /* * Fixing the bug we've had in the Stata reader for * a longest time: new lines and tabs need to * be escaped too - otherwise it breaks our * TAB file structure! -- L.A. */ escapedString = escapedString.replaceAll("\t", Matcher.quoteReplacement("\\t")); escapedString = escapedString.replaceAll("\n", Matcher.quoteReplacement("\\n")); escapedString = escapedString.replaceAll("\r", Matcher.quoteReplacement("\\r")); // the escaped version of the string will be // stored in the tab file: dataRow[columnCounter] = "\"" + escapedString + "\""; // but note that the "raw" version of it is // used for the UNF: dataTable2[columnCounter][i] = string_datum; } byte_offset += strVarLength; break; default: dbgLog.fine("unknown variable type found"); String errorMessage = "unknow variable Type found at data section"; throw new InvalidObjectException(errorMessage); } // switch } // for-columnCounter // dump the row of data to the external file pwout.println(StringUtils.join(dataRow, "\t")); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row's data={" + StringUtils.join(dataRow, ",") + "};"); } // for- i (row) pwout.close(); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer("\ndataTable2(variable-wise):\n"); dbgLog.finer(Arrays.deepToString(dataTable2)); dbgLog.finer("\ndateFormat(variable-wise):\n"); dbgLog.finer(Arrays.deepToString(dateFormat)); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("variableTypelList:\n" + Arrays.deepToString(variableTypelList)); dbgLog.fine("variableTypelListFinal:\n" + Arrays.deepToString(variableTypelListFinal)); } String[] unfValues = new String[nvar]; for (int j = 0; j < nvar; j++) { String variableType_j = variableTypelListFinal[j]; unfValues[j] = getUNF(dataTable2[j], dateFormat[j], variableType_j, unfVersionNumber, j); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(j + "th unf value" + unfValues[j]); } if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("unf set:\n" + Arrays.deepToString(unfValues)); fileUnfValue = UNF5Util.calculateUNF(unfValues); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("file-unf=" + fileUnfValue); stataDataSection.setUnf(unfValues); stataDataSection.setFileUnf(fileUnfValue); smd.setVariableUNF(unfValues); smd.getFileInformation().put("fileUNF", fileUnfValue); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("unf values:\n" + unfValues); stataDataSection.setData(dataTable2); // close the stream dbgLog.fine("***** decodeData(): end *****\n\n"); }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java
void parseValueLabelsRelease105(BufferedInputStream stream) throws IOException { dbgLog.fine("parseValueLabelsRelease105(): start"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }/*ww w .j ava 2s .com*/ int nvar = dataTable.getVarQuantity().intValue(); int length_label_name = constantTable.get("NAME") + 1; // note: caution +1 as the null character, not 9 byte int length_value_label_header = value_label_table_length + length_label_name; if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("value_label_table_length=" + value_label_table_length); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("length_value_label_header=" + length_value_label_header); } int length_lable_name_field = 8; /* Seg field byte type 1-1. no of pairs 2 int (= m) 1-2. vlt_name 10 includes char+(\0) == name used in Sec2.part 5 ----------------------------------- 11 2-1. values 2*n int[] 2-2. labels 8*n char */ // This map will hold a temporary lookup table for all the categorical // value-label groups we are going to find here: // These groups have unique names, and a group *may be shared* between // multiple variables. In the method decodeDescriptorValueLabel above // we have populated a lookup table where variables are linked to the // corresponding value-label groups by name. Thus we must fully populate // the full map of all the variable groups, then go through the list // of variables and create the dataverse variable categories from // them. -- L.A. 4.0 Map<String, Map<String, String>> tempValueLabelTable = new LinkedHashMap<>(); for (int i = 0; i < nvar; i++) { if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("\n\n" + i + "th value-label table header"); } byte[] valueLabelHeader = new byte[length_value_label_header]; // Part 1: reading the header of a value-label table if exists int nbytes = stream.read(valueLabelHeader, 0, length_value_label_header); if (nbytes == 0) { throw new IOException("reading value label header: no datum"); } // 1.1 number of value-label pairs in this table (= m) ByteBuffer bb_value_label_pairs = ByteBuffer.wrap(valueLabelHeader, 0, value_label_table_length); if (isLittleEndian) { bb_value_label_pairs.order(ByteOrder.LITTLE_ENDIAN); //if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("value lable table lenth: byte reversed"); } int no_value_label_pairs = bb_value_label_pairs.getShort(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("no_value_label_pairs=" + no_value_label_pairs); } // 1.2 labelName String rawLabelName = new String(Arrays.copyOfRange(valueLabelHeader, value_label_table_length, (value_label_table_length + length_label_name)), "ISO-8859-1"); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("rawLabelName(length)=" + rawLabelName.length()); } String labelName = rawLabelName.substring(0, rawLabelName.indexOf(0)); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("label name = " + labelName + "\n"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine(i + "-th value-label table"); } // Part 2: reading the value-label table // the length of the value-label table is: 2*m + 8*m = 10*m int length_value_label_table = (value_label_table_length + length_lable_name_field) * no_value_label_pairs; if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("length_value_label_table=" + length_value_label_table); } byte[] valueLabelTable_i = new byte[length_value_label_table]; int noBytes = stream.read(valueLabelTable_i, 0, length_value_label_table); if (noBytes == 0) { throw new IOException("reading value label table: no datum"); } // 2-1. 2-byte-integer array (2*m): value array (sorted) short[] valueList = new short[no_value_label_pairs]; int offset_value = 0; for (int k = 0; k < no_value_label_pairs; k++) { ByteBuffer bb_value_list = ByteBuffer.wrap(valueLabelTable_i, offset_value, value_label_table_length); if (isLittleEndian) { bb_value_list.order(ByteOrder.LITTLE_ENDIAN); } valueList[k] = bb_value_list.getShort(); offset_value += value_label_table_length; } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("value_list=" + Arrays.toString(valueList) + "\n"); } // 2-2. 8-byte chars that store label data (m units of labels) if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("current offset_value=" + offset_value); } int offset_start = offset_value; int offset_end = offset_value + length_lable_name_field; String[] labelList = new String[no_value_label_pairs]; for (int l = 0; l < no_value_label_pairs; l++) { String string_l = new String(Arrays.copyOfRange(valueLabelTable_i, offset_start, offset_end), "ISO-8859-1"); int null_position = string_l.indexOf(0); if (null_position != -1) { labelList[l] = string_l.substring(0, null_position); } else { labelList[l] = string_l; } offset_start = offset_end; offset_end += length_lable_name_field; } // Finally, we've reached the actual value-label pairs. We'll go // through them and put them on the temporary lookup map: tempValueLabelTable.put(labelName, new LinkedHashMap<>()); for (int j = 0; j < no_value_label_pairs; j++) { if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine(j + "-th pair:" + valueList[j] + "[" + labelList[j] + "]"); } // TODO: do we need any null/empty string checks here? -- L.A. 4.0 tempValueLabelTable.get(labelName).put(Integer.toString(valueList[j]), labelList[j]); } if (stream.available() == 0) { // reached the end of the file if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("reached the end of file at " + i + "th value-label Table."); } break; } } // for nvar loop // And now we can go through the list of variables, see if any have // value-label groups linked, then build dataverse VariableCategory // objects for them, using the values stored in the temporary map // we've just built: for (int i = 0; i < nvar; i++) { if (valueLabelsLookupTable[i] != null) { if (tempValueLabelTable.get(valueLabelsLookupTable[i]) != null) { // What if it is null? -- is it a legit condition, that // a variable was advertised as having categorical values, // but no such cat value group exists under this name? // -- L.A. for (String value : tempValueLabelTable.get(valueLabelsLookupTable[i]).keySet()) { VariableCategory cat = new VariableCategory(); cat.setValue(value); cat.setLabel(tempValueLabelTable.get(valueLabelsLookupTable[i]).get(value)); /* cross-link the variable and category to each other: */ cat.setDataVariable(dataTable.getDataVariables().get(i)); dataTable.getDataVariables().get(i).getCategories().add(cat); } } } } dbgLog.fine("parseValueLabelsRelease105(): end"); }
From source file:com.healthmarketscience.jackcess.Column.java
/** * Deserialize a raw byte value for this column into an Object * @param data The raw byte value/*from ww w .j a v a 2 s. co m*/ * @param order Byte order in which the raw value is stored * @return The deserialized Object * @usage _advanced_method_ */ public Object read(byte[] data, ByteOrder order) throws IOException { ByteBuffer buffer = ByteBuffer.wrap(data); buffer.order(order); if (_type == DataType.BOOLEAN) { throw new IOException("Tried to read a boolean from data instead of null mask."); } else if (_type == DataType.BYTE) { return Byte.valueOf(buffer.get()); } else if (_type == DataType.INT) { return Short.valueOf(buffer.getShort()); } else if (_type == DataType.LONG) { return Integer.valueOf(buffer.getInt()); } else if (_type == DataType.DOUBLE) { return Double.valueOf(buffer.getDouble()); } else if (_type == DataType.FLOAT) { return Float.valueOf(buffer.getFloat()); } else if (_type == DataType.SHORT_DATE_TIME) { return readDateValue(buffer); } else if (_type == DataType.BINARY) { return data; } else if (_type == DataType.TEXT) { return decodeTextValue(data); } else if (_type == DataType.MONEY) { return readCurrencyValue(buffer); } else if (_type == DataType.OLE) { if (data.length > 0) { return readLongValue(data); } return null; } else if (_type == DataType.MEMO) { if (data.length > 0) { return readLongStringValue(data); } return null; } else if (_type == DataType.NUMERIC) { return readNumericValue(buffer); } else if (_type == DataType.GUID) { return readGUIDValue(buffer, order); } else if ((_type == DataType.UNKNOWN_0D) || (_type == DataType.UNKNOWN_11)) { // treat like "binary" data return data; } else if (_type == DataType.COMPLEX_TYPE) { return new ComplexValueForeignKey(this, buffer.getInt()); } else if (_type.isUnsupported()) { return rawDataWrapper(data); } else { throw new IOException("Unrecognized data type: " + _type); } }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java
private void decodeData(BufferedInputStream stream) throws IOException { dbgLog.fine("\n***** decodeData(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }/* ww w .j ava2 s . co m*/ //int nvar = (Integer)smd.getFileInformation().get("varQnty"); int nvar = dataTable.getVarQuantity().intValue(); //int nobs = (Integer)smd.getFileInformation().get("caseQnty"); int nobs = dataTable.getCaseQuantity().intValue(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("data dimensions[observations x variables] = (" + nobs + "x" + nvar + ")"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("bytes per row=" + bytes_per_row + " bytes"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("variableTypes=" + Arrays.deepToString(variableTypes)); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("StringLengthTable=" + StringLengthTable); } // create a File object to save the tab-delimited data file FileOutputStream fileOutTab = null; PrintWriter pwout = null; File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab"); // save the temp tab-delimited file in the return ingest object: ingesteddata.setTabDelimitedFile(tabDelimitedDataFile); fileOutTab = new FileOutputStream(tabDelimitedDataFile); pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); /* Should we lose this dateFormat thing in 4.0? * the UNF should be calculatable on the app side solely from the data * stored in the tab file and the type information stored the dataVariable * object. * furthermore, the very idea of storing a format entry not just for * every variable, but for every value/observation is a bit strange. * TODO: review and confirm that, in the 3.* implementation, every * entry in dateFormat[nvar][*] is indeed the same - except for the * missing value entries. -- L.A. 4.0 (OK, I got rid of the dateFormat; instead I kinda sorta assume that the format is the same for every value in a column, save for the missing values... like this: dataTable.getDataVariables().get(columnCounter).setFormatSchemaName(ddt.format); BUT, this needs to be reviewed/confirmed etc! */ //String[][] dateFormat = new String[nvar][nobs]; for (int i = 0; i < nobs; i++) { byte[] dataRowBytes = new byte[bytes_per_row]; Object[] dataRow = new Object[nvar]; int nbytes = stream.read(dataRowBytes, 0, bytes_per_row); if (nbytes == 0) { String errorMessage = "reading data: no data were read at(" + i + "th row)"; throw new IOException(errorMessage); } // decoding each row int byte_offset = 0; for (int columnCounter = 0; columnCounter < variableTypes.length; columnCounter++) { Integer varType = variableTypeMap.get(variableTypes[columnCounter]); // 4.0 Check if this is a time/date variable: boolean isDateTimeDatum = false; String formatCategory = dataTable.getDataVariables().get(columnCounter).getFormatCategory(); if (formatCategory != null && (formatCategory.equals("time") || formatCategory.equals("date"))) { isDateTimeDatum = true; } String variableFormat = dateVariableFormats[columnCounter]; switch (varType != null ? varType : 256) { case -5: // Byte case // note: 1 byte signed byte byte_datum = dataRowBytes[byte_offset]; if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column byte =" + byte_datum); } if (byte_datum >= BYTE_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { dataRow[columnCounter] = byte_datum; } byte_offset++; break; case -4: // Stata-int (=java's short: 2byte) case // note: 2-byte signed int, not java's int ByteBuffer int_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 2); if (isLittleEndian) { int_buffer.order(ByteOrder.LITTLE_ENDIAN); } short short_datum = int_buffer.getShort(); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column stata int =" + short_datum); } if (short_datum >= INT_MISSIG_VALUE) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column stata long missing value=" + short_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("short", variableFormat, Short.toString(short_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; //dateFormat[columnCounter][i] = ddt.format; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = short_datum; } } byte_offset += 2; break; case -3: // stata-Long (= java's int: 4 byte) case // note: 4-byte singed, not java's long //dbgLog.fine("DATreader: stata long"); ByteBuffer long_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { long_buffer.order(ByteOrder.LITTLE_ENDIAN); } int int_datum = long_buffer.getInt(); if (dbgLog.isLoggable(Level.FINE)) { //dbgLog.fine(i + "-th row " + columnCounter // + "=th column stata long =" + int_datum); } if (int_datum >= LONG_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINE)) { //dbgLog.fine(i + "-th row " + columnCounter // + "=th column stata long missing value=" + int_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("int", variableFormat, Integer.toString(int_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = int_datum; } } byte_offset += 4; break; case -2: // float case // note: 4-byte ByteBuffer float_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { float_buffer.order(ByteOrder.LITTLE_ENDIAN); } float float_datum = float_buffer.getFloat(); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column float =" + float_datum); } if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column float missing value=" + float_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("float", variableFormat, doubleNumberFormatter.format(float_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = float_datum; // This may be temporary - but for now (as in, while I'm testing // 4.0 ingest against 3.* ingest, I need to be able to tell if a // floating point value was a single, or double float in the // original STATA file: -- L.A. Jul. 2014 dataTable.getDataVariables().get(columnCounter).setFormat("float"); } } byte_offset += 4; break; case -1: // double case // note: 8-byte ByteBuffer double_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 8); if (isLittleEndian) { double_buffer.order(ByteOrder.LITTLE_ENDIAN); } double double_datum = double_buffer.getDouble(); if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column double missing value=" + double_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("double", variableFormat, doubleNumberFormatter.format(double_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = doubleNumberFormatter.format(double_datum); } } byte_offset += 8; break; case 0: // String case int strVarLength = StringLengthTable.get(columnCounter); String raw_datum = new String( Arrays.copyOfRange(dataRowBytes, byte_offset, (byte_offset + strVarLength)), "ISO-8859-1"); // TODO: // is it the right thing to do, to default to "ISO-8859-1"? // (it may be; since there's no mechanism for specifying // alternative encodings in Stata, this may be their default; // it just needs to be verified. -- L.A. Jul. 2014) String string_datum = getNullStrippedString(raw_datum); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column string =" + string_datum); } if (string_datum.isEmpty()) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column string missing value=" + string_datum); } // TODO: /* Is this really a missing value case? * Or is it an honest empty string? * Is there such a thing as a missing value for a String in Stata? * -- L.A. 4.0 */ dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { /* * Some special characters, like new lines and tabs need to * be escaped - otherwise they will break our TAB file * structure! * But before we escape anything, all the back slashes * already in the string need to be escaped themselves. */ String escapedString = string_datum.replace("\\", "\\\\"); // escape quotes: escapedString = escapedString.replaceAll("\"", Matcher.quoteReplacement("\\\"")); // escape tabs and new lines: escapedString = escapedString.replaceAll("\t", Matcher.quoteReplacement("\\t")); escapedString = escapedString.replaceAll("\n", Matcher.quoteReplacement("\\n")); escapedString = escapedString.replaceAll("\r", Matcher.quoteReplacement("\\r")); // the escaped version of the string is stored in the tab file // enclosed in double-quotes; this is in order to be able // to differentiate between an empty string (tab-delimited empty string in // double quotes) and a missing value (tab-delimited empty string). // Although the question still remains - is it even possible // to store an empty string, that's not a missing value, in Stata? // - see the comment in the missing value case above. -- L.A. 4.0 dataRow[columnCounter] = "\"" + escapedString + "\""; } byte_offset += strVarLength; break; default: dbgLog.fine("unknown variable type found"); String errorMessage = "unknow variable Type found at data section"; throw new InvalidObjectException(errorMessage); } // switch } // for-columnCounter // Dump the row of data to the tab-delimited file we are producing: pwout.println(StringUtils.join(dataRow, "\t")); if (dbgLog.isLoggable(Level.FINE)) { //dbgLog.fine(i + "-th row's data={" + StringUtils.join(dataRow, ",") + "};"); } } // for- i (row) pwout.close(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("variableTypes:\n" + Arrays.deepToString(variableTypes)); } dbgLog.fine("DTA Ingest: decodeData(): end."); }
From source file:com.healthmarketscience.jackcess.impl.IndexData.java
/** * Read the rest of the index info from a tableBuffer * @param tableBuffer table definition buffer to read from initial info * @param availableColumns Columns that this index may use *///from www .ja va 2 s . c om public void read(ByteBuffer tableBuffer, List<ColumnImpl> availableColumns) throws IOException { ByteUtil.forward(tableBuffer, getFormat().SKIP_BEFORE_INDEX); //Forward past Unknown for (int i = 0; i < MAX_COLUMNS; i++) { short columnNumber = tableBuffer.getShort(); byte colFlags = tableBuffer.get(); if (columnNumber != COLUMN_UNUSED) { // find the desired column by column number (which is not necessarily // the same as the column index) ColumnImpl idxCol = null; for (ColumnImpl col : availableColumns) { if (col.getColumnNumber() == columnNumber) { idxCol = col; break; } } if (idxCol == null) { throw new IOException("Could not find column with number " + columnNumber + " for index"); } _columns.add(newColumnDescriptor(idxCol, colFlags)); } } _ownedPages = UsageMap.read(getTable().getDatabase(), tableBuffer, false); _rootPageNumber = tableBuffer.getInt(); ByteUtil.forward(tableBuffer, getFormat().SKIP_BEFORE_INDEX_FLAGS); //Forward past Unknown _indexFlags = tableBuffer.get(); ByteUtil.forward(tableBuffer, getFormat().SKIP_AFTER_INDEX_FLAGS); //Forward past other stuff }
From source file:nodomain.freeyourgadget.gadgetbridge.service.devices.pebble.PebbleProtocol.java
private void decodeAppLogs(ByteBuffer buf) { UUID uuid = getUUID(buf);//from w ww . j a va 2s .c o m int timestamp = buf.getInt(); int logLevel = buf.get() & 0xff; int messageLength = buf.get() & 0xff; int lineNumber = buf.getShort() & 0xffff; String fileName = getFixedString(buf, 16); String message = getFixedString(buf, messageLength); LOG.debug("APP_LOGS (" + logLevel + ") from uuid " + uuid.toString() + " in " + fileName + ":" + lineNumber + " " + message); }
From source file:nodomain.freeyourgadget.gadgetbridge.service.devices.pebble.PebbleProtocol.java
private GBDeviceEvent decodeVoiceControl(ByteBuffer buf) { buf.order(ByteOrder.LITTLE_ENDIAN); byte command = buf.get(); int flags = buf.getInt(); byte session_type = buf.get(); //0x01 dictation 0x02 command short session_id = buf.getShort(); //attributes// ww w. j a va2 s .c o m byte count = buf.get(); byte type = buf.get(); short length = buf.getShort(); byte[] version = new byte[20]; buf.get(version); //it's a string like "1.2rc1" int sample_rate = buf.getInt(); short bit_rate = buf.getShort(); byte bitstream_version = buf.get(); short frame_size = buf.getShort(); GBDeviceEventSendBytes sendBytes = new GBDeviceEventSendBytes(); if (command == 0x01) { //session setup sendBytes.encodedBytes = null; } else if (command == 0x02) { //dictation result sendBytes.encodedBytes = null; } return sendBytes; }