List of usage examples for java.nio ByteBuffer duplicate
public abstract ByteBuffer duplicate();
From source file:com.ery.ertc.estorm.util.Bytes.java
/** * Returns a new byte array, copied from the given {@code buf}, from the * index 0 (inclusive) to the limit (exclusive), regardless of the current * position. The position and the other index parameters are not changed. * //w ww. jav a2 s . c om * @param buf * a byte buffer * @return the byte array * @see #getBytes(ByteBuffer) */ public static byte[] toBytes(ByteBuffer buf) { ByteBuffer dup = buf.duplicate(); dup.position(0); return readBytes(dup); }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.dta.DTAFileReader.java
private void decodeHeader(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeHeader(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }// w w w .j a v a 2 s . c om dbgLog.fine("reading the header segument 1: 4 byte\n"); byte[] magic_number = new byte[DTA_MAGIC_NUMBER_LENGTH]; int nbytes = stream.read(magic_number, 0, DTA_MAGIC_NUMBER_LENGTH); if (nbytes == 0) { throw new IOException(); } if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("hex dump: 1st 4bytes =>" + new String(Hex.encodeHex(magic_number)) + "<-"); if (magic_number[2] != 1) { dbgLog.fine("3rd byte is not 1: given file is not stata-dta type"); throw new IllegalArgumentException("given file is not stata-dta type"); } else if ((magic_number[1] != 1) && (magic_number[1] != 2)) { dbgLog.fine("2nd byte is neither 0 nor 1: this file is not stata-dta type"); throw new IllegalArgumentException("given file is not stata-dta type"); } else if (!STATA_RELEASE_NUMBER.containsKey((int) magic_number[0])) { dbgLog.fine("1st byte (" + magic_number[0] + ") is not within the ingestable range [rel. 3-10]:" + "we cannot ingest this Stata file."); throw new IllegalArgumentException("given file is not stata-dta type"); } else { releaseNumber = (int) magic_number[0]; smd.getFileInformation().put("releaseNumber", releaseNumber); smd.getFileInformation().put("byteOrder", (int) magic_number[1]); smd.getFileInformation().put("OSByteOrder", ByteOrder.nativeOrder().toString()); smd.getFileInformation().put("mimeType", MIME_TYPE[0]); smd.getFileInformation().put("fileFormat", MIME_TYPE[0]); init(); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("this file is stata-dta type: " + STATA_RELEASE_NUMBER.get((int) magic_number[0]) + "(Number=" + magic_number[0] + ")"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("Endian(file)(Big: 1; Little:2)=" + magic_number[1]); if ((int) magic_number[1] == 2) { isLittleEndian = true; dbgLog.fine("Reveral of the bytes is necessary to decode " + "multi-byte fields"); } if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("Endian of this platform:" + ByteOrder.nativeOrder().toString()); } dbgLog.fine("reading the remaining header segument 2: 60 or 109-byte"); byte[] header = new byte[headerLength]; nbytes = stream.read(header, 0, headerLength); //printHexDump(header, "header:\n"); // 1. number of variables: short (2 bytes) ByteBuffer bbnvar = ByteBuffer.wrap(header, 0, NVAR_FIELD_LENGTH); ByteBuffer dupnvar = bbnvar.duplicate(); short short_nvar = dupnvar.getShort(); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("get original short view(nvar)=" + short_nvar); if (isLittleEndian) { bbnvar.order(ByteOrder.LITTLE_ENDIAN); } short shrt_nvar = bbnvar.getShort(); smd.getFileInformation().put("varQnty", new Integer(shrt_nvar)); // setup variableTypeList int nvar = shrt_nvar; variableTypelList = new String[nvar]; // 2. number of observations: int (4 bytes) ByteBuffer nobs = ByteBuffer.wrap(header, NVAR_FIELD_LENGTH, NOBS_FIELD_LENGTH); ByteBuffer dupnobs = nobs.duplicate(); int int_dupnobs = dupnobs.getInt(); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("raw nobs=" + int_dupnobs); if (isLittleEndian) { nobs.order(ByteOrder.LITTLE_ENDIAN); } int int_nobs = nobs.getInt(); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("reversed nobs=" + int_nobs); smd.getFileInformation().put("caseQnty", new Integer(int_nobs)); // 3. data_label: 32 or 81 bytes int dl_offset = NVAR_FIELD_LENGTH + NOBS_FIELD_LENGTH; if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("dl_offset=" + dl_offset); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("data_label_length=" + dataLabelLength); String data_label = new String(Arrays.copyOfRange(header, dl_offset, (dl_offset + dataLabelLength)), "ISO-8859-1"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("data_label_length=" + data_label.length()); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("loation of the null character=" + data_label.indexOf(0)); String dataLabel = getNullStrippedString(data_label); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("data_label_length=" + dataLabel.length()); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("data_label=[" + dataLabel + "]"); smd.getFileInformation().put("dataLabel", dataLabel); // 4. time_stamp: ASCII String (18 bytes) // added after release 4 if (releaseNumber > 104) { int ts_offset = dl_offset + dataLabelLength; String time_stamp = new String(Arrays.copyOfRange(header, ts_offset, ts_offset + TIME_STAMP_LENGTH), "ISO-8859-1"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("time_stamp_length=" + time_stamp.length()); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("loation of the null character=" + time_stamp.indexOf(0)); String timeStamp = getNullStrippedString(time_stamp); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("timeStamp_length=" + timeStamp.length()); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("timeStamp=[" + timeStamp + "]"); smd.getFileInformation().put("timeStamp", timeStamp); smd.getFileInformation().put("fileDate", timeStamp); smd.getFileInformation().put("fileTime", timeStamp); smd.getFileInformation().put("varFormat_schema", "STATA"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("smd dump:" + smd.toString()); dbgLog.fine("***** decodeHeader(): end *****"); } }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java
private void decodeHeader(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeHeader(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }//from w w w . jav a2 s. c o m dbgLog.fine("reading the header segument 1: 4 byte\n"); byte[] magic_number = new byte[DTA_MAGIC_NUMBER_LENGTH]; int nbytes = stream.read(magic_number, 0, DTA_MAGIC_NUMBER_LENGTH); if (nbytes == 0) { throw new IOException(); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("hex dump: 1st 4bytes =>" + new String(Hex.encodeHex(magic_number)) + "<-"); } if (magic_number[2] != 1) { dbgLog.fine("3rd byte is not 1: given file is not stata-dta type"); throw new IllegalArgumentException("The file is not in a STATA format that we can read or support."); } else if ((magic_number[1] != 1) && (magic_number[1] != 2)) { dbgLog.fine("2nd byte is neither 0 nor 1: this file is not stata-dta type"); throw new IllegalArgumentException("given file is not stata-dta type"); } else if (!STATA_RELEASE_NUMBER.containsKey((int) magic_number[0])) { dbgLog.fine("1st byte (" + magic_number[0] + ") is not within the ingestable range [rel. 3-10]:" + "we cannot ingest this Stata file."); throw new IllegalArgumentException("given file is not stata-dta type"); } else { releaseNumber = magic_number[0]; init(); dataTable.setOriginalFileFormat(MIME_TYPE[0]); /* * releaseNumber: * for storing in the datatable, we are converting the numeric Stata * release number into a more user friendly "version number"; * e.g., "release number 115" = "Stata v. 12" * -- L.A. 4.0 */ dataTable.setOriginalFormatVersion(STATA_RELEASE_NUMBER.get(releaseNumber)); dataTable.setUnf("UNF:6:FILEFILEFILEFILE"); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("this file is stata-dta type: " + STATA_RELEASE_NUMBER.get(releaseNumber) + " (that means Stata version " + releaseNumber + ")"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("Endian(file)(Big: 1; Little:2)=" + magic_number[1]); } /* * byte order: defined in the second byte of the "magic number": */ if (magic_number[1] == 2) { isLittleEndian = true; dbgLog.fine("Reversal of the bytes is necessary to decode " + "multi-byte fields"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("Endian of this platform:" + ByteOrder.nativeOrder().toString()); } } dbgLog.fine("reading the remaining header segument 2: 60 or 109-byte"); byte[] header = new byte[headerLength]; nbytes = stream.read(header, 0, headerLength); // 1. number of variables: short (2 bytes) ByteBuffer bbnvar = ByteBuffer.wrap(header, 0, NVAR_FIELD_LENGTH); ByteBuffer dupnvar = bbnvar.duplicate(); short short_nvar = dupnvar.getShort(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("get original short view(nvar)=" + short_nvar); } if (isLittleEndian) { bbnvar.order(ByteOrder.LITTLE_ENDIAN); } short shrt_nvar = bbnvar.getShort(); dataTable.setVarQuantity(new Long(shrt_nvar)); int nvar = shrt_nvar; if (dbgLog.isLoggable(Level.INFO)) { dbgLog.info("number of variables(nvar)=" + nvar); } // 4.0 Initialize dataverse variable objects: List<DataVariable> variableList = new ArrayList<>(); for (int i = 0; i < nvar; i++) { DataVariable dv = new DataVariable(); dv.setInvalidRanges(new ArrayList<>()); dv.setSummaryStatistics(new ArrayList<>()); dv.setUnf("UNF:6:XXX"); dv.setCategories(new ArrayList<>()); variableList.add(dv); dv.setFileOrder(i); dv.setDataTable(dataTable); } dataTable.setDataVariables(variableList); // setup variableTypeList variableTypes = new String[nvar]; // and the date/time format list: dateVariableFormats = new String[nvar]; // 2. number of observations: int (4 bytes) ByteBuffer nobs = ByteBuffer.wrap(header, NVAR_FIELD_LENGTH, NOBS_FIELD_LENGTH); ByteBuffer dupnobs = nobs.duplicate(); int int_dupnobs = dupnobs.getInt(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("raw nobs=" + int_dupnobs); } if (isLittleEndian) { nobs.order(ByteOrder.LITTLE_ENDIAN); } int int_nobs = nobs.getInt(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("reversed nobs=" + int_nobs); } // smd.getFileInformation().put("caseQnty", new Integer(int_nobs)); dataTable.setCaseQuantity(new Long(int_nobs)); /* the "data label" - note that we are not using this label for anything (wonder what it is though? can we use it somewhere?) but we still need to extract it from the byte stream, since the offsets of the objects stored further up are calculated relative to it. -- L.A., 4.0 */ // 3. data_label: 32 or 81 bytes int dl_offset = NVAR_FIELD_LENGTH + NOBS_FIELD_LENGTH; if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("dl_offset=" + dl_offset); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("data_label_length=" + dataLabelLength); } String data_label = new String(Arrays.copyOfRange(header, dl_offset, (dl_offset + dataLabelLength)), "ISO-8859-1"); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("data_label_length=" + data_label.length()); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("loation of the null character=" + data_label.indexOf(0)); } String dataLabel = getNullStrippedString(data_label); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("data_label_length=" + dataLabel.length()); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("data_label=[" + dataLabel + "]"); } // smd.getFileInformation().put("dataLabel", dataLabel); /* end of "data label" */ // 4. time_stamp: ASCII String (18 bytes) // added after release 4 if (releaseNumber > 104) { int ts_offset = dl_offset + dataLabelLength; String time_stamp = new String(Arrays.copyOfRange(header, ts_offset, ts_offset + TIME_STAMP_LENGTH), "ISO-8859-1"); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("time_stamp_length=" + time_stamp.length()); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("loation of the null character=" + time_stamp.indexOf(0)); } String timeStamp = getNullStrippedString(time_stamp); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("timeStamp_length=" + timeStamp.length()); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("timeStamp=[" + timeStamp + "]"); } } }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
void decodeRecordType1(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType1(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }/*w w w .ja va2 s .c o m*/ // how to read each recordType // 1. set-up the following objects before reading bytes // a. the working byte array // b. the storage object // the length of this field: 172bytes = 60 + 4 + 12 + 4 + 8 + 84 // this field consists of 6 distinct blocks byte[] recordType1 = new byte[LENGTH_RECORDTYPE1]; // int caseWeightVariableOBSIndex = 0; try { int nbytes = stream.read(recordType1, 0, LENGTH_RECORDTYPE1); //printHexDump(recordType1, "recordType1"); if (nbytes == 0) { throw new IOException("reading recordType1: no byte was read"); } // 1.1 60 byte-String that tells the platform/version of SPSS that // wrote this file int offset_start = 0; int offset_end = LENGTH_SPSS_PRODUCT_INFO; // 60 bytes String productInfo = new String(Arrays.copyOfRange(recordType1, offset_start, offset_end), "US-ASCII"); dbgLog.fine("productInfo:\n" + productInfo + "\n"); dataTable.setOriginalFormatVersion(productInfo); // try to parse out the SPSS version that created this data // file: String spssVersionTag = null; String regexpVersionNumber = ".*Release ([0-9]*)"; Pattern versionTagPattern = Pattern.compile(regexpVersionNumber); Matcher matcher = versionTagPattern.matcher(productInfo); if (matcher.find()) { spssVersionTag = matcher.group(1); dbgLog.fine("SPSS Version Number: " + spssVersionTag); } // TODO: // try a more elaborate regex (like the one for the "new-style" // productInfo line, below), to select the version number, the // minor version number and the platform (windows vs. mac) separately. // would be cleaner to save just that, rather than the entire // productInfo tag. // -- L.A. 4.0 beta if (spssVersionTag == null || spssVersionTag.equals("")) { // Later versions of SPSS have different formatting of the // productInfo line: regexpVersionNumber = ".* IBM SPSS STATISTICS.* ([^ ]*) ([0-9][0-9]*)([^ ]*)"; versionTagPattern = Pattern.compile(regexpVersionNumber); matcher = versionTagPattern.matcher(productInfo); if (matcher.find()) { String spssPlatformTag = matcher.group(1); spssVersionTag = matcher.group(2); String spssVersionTagMinor = matcher.group(3); dbgLog.fine("SPSS Version Number (new style): " + spssVersionTag); dbgLog.fine("SPSS Version/Platform Identification (new style:) " + spssPlatformTag + " " + spssVersionTag + spssVersionTagMinor); dataTable .setOriginalFormatVersion(spssVersionTag + spssVersionTagMinor + " " + spssPlatformTag); } } if (spssVersionTag != null && !spssVersionTag.equals("")) { spssVersionNumber = Integer.valueOf(spssVersionTag).intValue(); /* * Starting with SPSS version 16, the default encoding is * UTF-8. * But we are only going to use it if the user did not explicitly * specify the encoding on the addfiles page. Then we'd want * to stick with whatever they entered. * (also, it appears that (starting with the same version 16?) * it is actually possible to define the locale/character set * in the file - section 7, sub-type 20; TODO: decide which * one takes precedence, if we have the encoding defined both * in the file and through the UI. -- L.A. 4.0 beta) */ if (spssVersionNumber > 15) { if (getDataLanguageEncoding() == null) { //defaultCharSet = "windows-1252"; // temporary! -- L.A. "UTF-8"; defaultCharSet = "UTF-8"; } } } // TODO: // decide if we want to save the [determined/guessed] character set // somewhere in the dataset object. // this may be relevant in cases when accented/non-latin characters // get ingested incorrectly; // -- L.A. 4.0 beta // 1.2) 4-byte file-layout-code (byte-order) offset_start = offset_end; offset_end += LENGTH_FILE_LAYOUT_CODE; // 4 byte ByteBuffer bb_fileLayout_code = ByteBuffer.wrap(recordType1, offset_start, LENGTH_FILE_LAYOUT_CODE); ByteBuffer byteOderTest = bb_fileLayout_code.duplicate(); // interprete the 4 byte as int int int2test = byteOderTest.getInt(); if (int2test == 2 || int2test == 3) { dbgLog.fine("integer == " + int2test + ": the byte-oder of the writer is the same " + "as the counterpart of Java: Big Endian"); } else { // Because Java's byte-order is always big endian, // this(!=2) means this sav file was written on a little-endian machine // non-string, multi-bytes blocks must be byte-reversed bb_fileLayout_code.order(ByteOrder.LITTLE_ENDIAN); int2test = bb_fileLayout_code.getInt(); if (int2test == 2 || int2test == 3) { dbgLog.fine("The sav file was saved on a little endian machine"); dbgLog.fine("Reveral of the bytes is necessary to decode " + "multi-byte, non-string blocks"); isLittleEndian = true; } else { throw new IOException("reading recordType1:unknown file layout code=" + int2test); } } dbgLog.fine("Endian of this platform:" + ByteOrder.nativeOrder().toString()); // 1.3 4-byte Number_Of_OBS_Units_Per_Case // (= how many RT2 records => how many varilables) offset_start = offset_end; offset_end += LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE; // 4 byte ByteBuffer bb_OBS_units_per_case = ByteBuffer.wrap(recordType1, offset_start, LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE); if (isLittleEndian) { bb_OBS_units_per_case.order(ByteOrder.LITTLE_ENDIAN); } OBSUnitsPerCase = bb_OBS_units_per_case.getInt(); dbgLog.fine("RT1: OBSUnitsPerCase=" + OBSUnitsPerCase); // 1.4 4-byte Compression_Switch offset_start = offset_end; offset_end += LENGTH_COMPRESSION_SWITCH; // 4 byte ByteBuffer bb_compression_switch = ByteBuffer.wrap(recordType1, offset_start, LENGTH_COMPRESSION_SWITCH); if (isLittleEndian) { bb_compression_switch.order(ByteOrder.LITTLE_ENDIAN); } int compression_switch = bb_compression_switch.getInt(); if (compression_switch == 0) { // data section is not compressed isDataSectionCompressed = false; dbgLog.fine("data section is not compressed"); } else { dbgLog.fine("data section is compressed:" + compression_switch); } // 1.5 4-byte Case-Weight Variable Index // warning: this variable index starts from 1, not 0 offset_start = offset_end; offset_end += LENGTH_CASE_WEIGHT_VARIABLE_INDEX; // 4 byte ByteBuffer bb_Case_Weight_Variable_Index = ByteBuffer.wrap(recordType1, offset_start, LENGTH_CASE_WEIGHT_VARIABLE_INDEX); if (isLittleEndian) { bb_Case_Weight_Variable_Index.order(ByteOrder.LITTLE_ENDIAN); } caseWeightVariableOBSIndex = bb_Case_Weight_Variable_Index.getInt(); /// caseWeightVariableOBSIndex will be used later on to locate /// the weight variable; so we'll be able to mark the corresponding /// variables properly. // TODO: make sure case weight variables are properly handled! // -- L.A. 4.0 beta ///smd.getFileInformation().put("caseWeightVariableOBSIndex", caseWeightVariableOBSIndex); // 1.6 4-byte Number of Cases offset_start = offset_end; offset_end += LENGTH_NUMBER_OF_CASES; // 4 byte ByteBuffer bb_Number_Of_Cases = ByteBuffer.wrap(recordType1, offset_start, LENGTH_NUMBER_OF_CASES); if (isLittleEndian) { bb_Number_Of_Cases.order(ByteOrder.LITTLE_ENDIAN); } int numberOfCases = bb_Number_Of_Cases.getInt(); if (numberOfCases < 0) { // -1 if numberOfCases is unknown throw new RuntimeException("number of cases is not recorded in the header"); } else { dbgLog.fine("RT1: number of cases is recorded= " + numberOfCases); dataTable.setCaseQuantity(new Long(numberOfCases)); ///caseQnty = numberOfCases; ///smd.getFileInformation().put("caseQnty", numberOfCases); } // 1.7 8-byte compression-bias [not long but double] offset_start = offset_end; offset_end += LENGTH_COMPRESSION_BIAS; // 8 byte ByteBuffer bb_compression_bias = ByteBuffer .wrap(Arrays.copyOfRange(recordType1, offset_start, offset_end)); if (isLittleEndian) { bb_compression_bias.order(ByteOrder.LITTLE_ENDIAN); } Double compressionBias = bb_compression_bias.getDouble(); // TODO: // check if this "compression bias" is being used anywhere? // doesn't seem to be! // -- 4.0 alpha if (compressionBias == 100d) { // 100 is expected dbgLog.fine("compressionBias is 100 as expected"); ///smd.getFileInformation().put("compressionBias", 100); } else { dbgLog.fine("compression bias is not 100: " + compressionBias); ///smd.getFileInformation().put("compressionBias", compressionBias); } // 1.8 84-byte File Creation Information (date/time: dd MM yyhh:mm:ss + // 64-bytelabel) offset_start = offset_end; offset_end += LENGTH_FILE_CREATION_INFO; // 84 bytes String fileCreationInfo = getNullStrippedString( new String(Arrays.copyOfRange(recordType1, offset_start, offset_end), "US-ASCII")); dbgLog.fine("fileCreationInfo:\n" + fileCreationInfo + "\n"); String fileCreationDate = fileCreationInfo.substring(0, length_file_creation_date); int dateEnd = length_file_creation_date + length_file_creation_time; String fileCreationTime = fileCreationInfo.substring(length_file_creation_date, (dateEnd)); String fileCreationNote = fileCreationInfo.substring(dateEnd, length_file_creation_label); dbgLog.fine("fileDate=" + fileCreationDate); dbgLog.fine("fileTime=" + fileCreationTime); dbgLog.fine("fileNote" + fileCreationNote); } catch (IOException ex) { throw ex; } dbgLog.fine("decodeRecordType1(): end"); }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java
void decodeRecordType1(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType1(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }//from w w w . j a v a2 s.c o m // how to read each recordType // 1. set-up the following objects before reading bytes // a. the working byte array // b. the storage object // the length of this field: 172bytes = 60 + 4 + 12 + 4 + 8 + 84 // this field consists of 6 distinct blocks byte[] recordType1 = new byte[LENGTH_RECORDTYPE1]; // int caseWeightVariableOBSIndex = 0; try { int nbytes = stream.read(recordType1, 0, LENGTH_RECORDTYPE1); //printHexDump(recordType1, "recordType1"); if (nbytes == 0) { throw new IOException("reading recordType1: no byte was read"); } // 1.1 60 byte-String that tells the platform/version of SPSS that // wrote this file int offset_start = 0; int offset_end = LENGTH_SPSS_PRODUCT_INFO; // 60 bytes String productInfo = new String(Arrays.copyOfRange(recordType1, offset_start, offset_end), "US-ASCII"); dbgLog.fine("productInfo:\n" + productInfo + "\n"); // add the info to the fileInfo smd.getFileInformation().put("productInfo", productInfo); // try to parse out the SPSS version that created this data // file: String spssVersionNumberTag = null; String regexpVersionNumber = ".*Release ([0-9]*)"; Pattern patternJsession = Pattern.compile(regexpVersionNumber); Matcher matcher = patternJsession.matcher(productInfo); if (matcher.find()) { spssVersionNumberTag = matcher.group(1); dbgLog.fine("SPSS Version Number: " + spssVersionNumberTag); } if (spssVersionNumberTag != null && !spssVersionNumberTag.equals("")) { spssVersionNumber = Integer.valueOf(spssVersionNumberTag).intValue(); /* * Starting with SPSS version 16, the default encoding is * UTF-8. * But we are only going to use it if the user did not explicitly * specify the encoding on the addfiles page. Then we'd want * to stick with whatever they entered. */ if (spssVersionNumber > 15) { if (getDataLanguageEncoding() == null) { defaultCharSet = "UTF-8"; } } } smd.getFileInformation().put("charset", defaultCharSet); // 1.2) 4-byte file-layout-code (byte-order) offset_start = offset_end; offset_end += LENGTH_FILE_LAYOUT_CODE; // 4 byte ByteBuffer bb_fileLayout_code = ByteBuffer.wrap(recordType1, offset_start, LENGTH_FILE_LAYOUT_CODE); ByteBuffer byteOderTest = bb_fileLayout_code.duplicate(); // interprete the 4 byte as int int int2test = byteOderTest.getInt(); if (int2test == 2 || int2test == 3) { dbgLog.fine("integer == " + int2test + ": the byte-oder of the writer is the same " + "as the counterpart of Java: Big Endian"); } else { // Because Java's byte-order is always big endian, // this(!=2) means this sav file was written on a little-endian machine // non-string, multi-bytes blocks must be byte-reversed bb_fileLayout_code.order(ByteOrder.LITTLE_ENDIAN); int2test = bb_fileLayout_code.getInt(); if (int2test == 2 || int2test == 3) { dbgLog.fine("The sav file was saved on a little endian machine"); dbgLog.fine("Reveral of the bytes is necessary to decode " + "multi-byte, non-string blocks"); isLittleEndian = true; } else { throw new IOException("reading recordType1:unknown file layout code=" + int2test); } } dbgLog.fine("Endian of this platform:" + ByteOrder.nativeOrder().toString()); smd.getFileInformation().put("OSByteOrder", ByteOrder.nativeOrder().toString()); smd.getFileInformation().put("byteOrder", int2test); // 1.3 4-byte Number_Of_OBS_Units_Per_Case // (= how many RT2 records => how many varilables) offset_start = offset_end; offset_end += LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE; // 4 byte ByteBuffer bb_OBS_units_per_case = ByteBuffer.wrap(recordType1, offset_start, LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE); if (isLittleEndian) { bb_OBS_units_per_case.order(ByteOrder.LITTLE_ENDIAN); } OBSUnitsPerCase = bb_OBS_units_per_case.getInt(); dbgLog.fine("RT1: OBSUnitsPerCase=" + OBSUnitsPerCase); smd.getFileInformation().put("OBSUnitsPerCase", OBSUnitsPerCase); // 1.4 4-byte Compression_Switch offset_start = offset_end; offset_end += LENGTH_COMPRESSION_SWITCH; // 4 byte ByteBuffer bb_compression_switch = ByteBuffer.wrap(recordType1, offset_start, LENGTH_COMPRESSION_SWITCH); if (isLittleEndian) { bb_compression_switch.order(ByteOrder.LITTLE_ENDIAN); } int compression_switch = bb_compression_switch.getInt(); if (compression_switch == 0) { // data section is not compressed isDataSectionCompressed = false; dbgLog.fine("data section is not compressed"); } else { dbgLog.fine("data section is compressed:" + compression_switch); } smd.getFileInformation().put("compressedData", compression_switch); // 1.5 4-byte Case-Weight Variable Index // warning: this variable index starts from 1, not 0 offset_start = offset_end; offset_end += LENGTH_CASE_WEIGHT_VARIABLE_INDEX; // 4 byte ByteBuffer bb_Case_Weight_Variable_Index = ByteBuffer.wrap(recordType1, offset_start, LENGTH_CASE_WEIGHT_VARIABLE_INDEX); if (isLittleEndian) { bb_Case_Weight_Variable_Index.order(ByteOrder.LITTLE_ENDIAN); } caseWeightVariableOBSIndex = bb_Case_Weight_Variable_Index.getInt(); smd.getFileInformation().put("caseWeightVariableOBSIndex", caseWeightVariableOBSIndex); // 1.6 4-byte Number of Cases offset_start = offset_end; offset_end += LENGTH_NUMBER_OF_CASES; // 4 byte ByteBuffer bb_Number_Of_Cases = ByteBuffer.wrap(recordType1, offset_start, LENGTH_NUMBER_OF_CASES); if (isLittleEndian) { bb_Number_Of_Cases.order(ByteOrder.LITTLE_ENDIAN); } int numberOfCases = bb_Number_Of_Cases.getInt(); if (numberOfCases < 0) { // -1 if numberOfCases is unknown throw new RuntimeException("number of cases is not recorded in the header"); } else { dbgLog.fine("RT1: number of cases is recorded= " + numberOfCases); caseQnty = numberOfCases; smd.getFileInformation().put("caseQnty", numberOfCases); } // 1.7 8-byte compression-bias [not long but double] offset_start = offset_end; offset_end += LENGTH_COMPRESSION_BIAS; // 8 byte ByteBuffer bb_compression_bias = ByteBuffer .wrap(Arrays.copyOfRange(recordType1, offset_start, offset_end)); if (isLittleEndian) { bb_compression_bias.order(ByteOrder.LITTLE_ENDIAN); } Double compressionBias = bb_compression_bias.getDouble(); if (compressionBias == 100d) { // 100 is expected dbgLog.fine("compressionBias is 100 as expected"); smd.getFileInformation().put("compressionBias", 100); } else { dbgLog.fine("compression bias is not 100: " + compressionBias); smd.getFileInformation().put("compressionBias", compressionBias); } // 1.8 84-byte File Creation Information (date/time: dd MM yyhh:mm:ss + // 64-bytelabel) offset_start = offset_end; offset_end += LENGTH_FILE_CREATION_INFO; // 84 bytes String fileCreationInfo = getNullStrippedString( new String(Arrays.copyOfRange(recordType1, offset_start, offset_end), "US-ASCII")); dbgLog.fine("fileCreationInfo:\n" + fileCreationInfo + "\n"); String fileCreationDate = fileCreationInfo.substring(0, length_file_creation_date); int dateEnd = length_file_creation_date + length_file_creation_time; String fileCreationTime = fileCreationInfo.substring(length_file_creation_date, (dateEnd)); String fileCreationNote = fileCreationInfo.substring(dateEnd, length_file_creation_label); dbgLog.fine("fileDate=" + fileCreationDate); dbgLog.fine("fileTime=" + fileCreationTime); dbgLog.fine("fileNote" + fileCreationNote); smd.getFileInformation().put("fileDate", fileCreationDate); smd.getFileInformation().put("fileTime", fileCreationTime); smd.getFileInformation().put("fileNote", fileCreationNote); smd.getFileInformation().put("varFormat_schema", "SPSS"); // add the info to the fileInfo smd.getFileInformation().put("mimeType", MIME_TYPE[0]); smd.getFileInformation().put("fileFormat", MIME_TYPE[0]); smd.setValueLabelMappingTable(valueVariableMappingTable); } catch (IOException ex) { //ex.printStackTrace(); throw ex; } dbgLog.fine("***** decodeRecordType1(): end *****"); }
From source file:com.linkedin.databus.core.DbusEventBuffer.java
/** * Batch interface to write events within a range out into a WritableByteChannel * @param range// ww w.ja v a2s . c om * @param writeChannel * @param encoding * @return number of bytes written */ public int batchWrite(Range range, WritableByteChannel writeChannel, Encoding encoding) { long startOffset = range.start; long endOffset = range.end; assert (_bufferPositionParser.bufferIndex(startOffset) == _bufferPositionParser.bufferIndex(endOffset)); ByteBuffer buf = _buffers[_bufferPositionParser.bufferIndex(startOffset)]; int endBufferOffset = _bufferPositionParser.bufferOffset(endOffset); int startBufferOffset = _bufferPositionParser.bufferOffset(startOffset); int bytesWritten = 0; switch (encoding) { case BINARY: { ByteBuffer writeBuf = buf.duplicate().order(_eventFactory.getByteOrder()); writeBuf.position(startBufferOffset); writeBuf.limit(endBufferOffset); try { bytesWritten = writeChannel.write(writeBuf); } catch (IOException e1) { LOG.error("batchWrite error: " + e1.getMessage(), e1); throw new RuntimeException(e1); } break; } case JSON: case JSON_PLAIN_VALUE: { DbusEventInternalReadable e = _eventFactory.createReadOnlyDbusEventFromBuffer(buf, startBufferOffset); int currentBufferOffset = startBufferOffset; while (currentBufferOffset != endBufferOffset) { e = e.reset(buf, currentBufferOffset); e.writeTo(writeChannel, encoding); currentBufferOffset += e.size(); } } } return (bytesWritten); }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
void decodeRecordType7(BufferedInputStream stream) throws IOException { dbgLog.fine("decodeRecordType7(): start"); int counter = 0; int[] headerSection = new int[2]; // the variables below may no longer needed; // but they may be useful for debugging/logging purposes. /// // RecordType 7 /// // Subtype 3 /// List<Integer> releaseMachineSpecificInfo = new ArrayList<Integer>(); /// List<String> releaseMachineSpecificInfoHex = new ArrayList<String>(); /// // Subytpe 4 /// Map<String, Double> OBSTypeValue = new LinkedHashMap<String, Double>(); /// Map<String, String> OBSTypeHexValue = new LinkedHashMap<String, String>(); //Subtype 11/*from ww w. j ava 2 s . c o m*/ /// List<Integer> measurementLevel = new ArrayList<Integer>(); /// List<Integer> columnWidth = new ArrayList<Integer>(); /// List<Integer> alignment = new ArrayList<Integer>(); while (true) { try { if (stream == null) { throw new IllegalArgumentException("RT7: stream == null!"); } // first check the 4-byte header value //if (stream.markSupported()){ stream.mark(1000); //} // 7.0 check the first 4 bytes byte[] headerCodeRt7 = new byte[LENGTH_RECORD_TYPE7_CODE]; int nbytes_rt7 = stream.read(headerCodeRt7, 0, LENGTH_RECORD_TYPE7_CODE); // to-do check against nbytes //printHexDump(headerCodeRt7, "RT7 header test"); ByteBuffer bb_header_code_rt7 = ByteBuffer.wrap(headerCodeRt7, 0, LENGTH_RECORD_TYPE7_CODE); if (isLittleEndian) { bb_header_code_rt7.order(ByteOrder.LITTLE_ENDIAN); } int intRT7test = bb_header_code_rt7.getInt(); dbgLog.fine("RT7: header test value=" + intRT7test); if (intRT7test != 7) { //if (stream.markSupported()){ //out.print("iteration="+safteyCounter); //dbgLog.fine("iteration="+safteyCounter); dbgLog.fine("intRT7test failed=" + intRT7test); dbgLog.fine("counter=" + counter); stream.reset(); return; //} } // 7.1 check 4-byte integer Sub-Type Code byte[] length_sub_type_code = new byte[LENGTH_RT7_SUB_TYPE_CODE]; int nbytes_rt7_1 = stream.read(length_sub_type_code, 0, LENGTH_RT7_SUB_TYPE_CODE); // to-do check against nbytes //printHexDump(length_how_many_line_bytes, "RT7 how_many_line_bytes"); ByteBuffer bb_sub_type_code = ByteBuffer.wrap(length_sub_type_code, 0, LENGTH_RT7_SUB_TYPE_CODE); if (isLittleEndian) { bb_sub_type_code.order(ByteOrder.LITTLE_ENDIAN); } int subTypeCode = bb_sub_type_code.getInt(); dbgLog.fine("RT7: subTypeCode=" + subTypeCode); switch (subTypeCode) { case 3: // 3: Release andMachine-Specific Integer Information //parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; int numberOfUnits = headerSection[1]; for (int i = 0; i < numberOfUnits; i++) { dbgLog.finer(i + "-th fieldData"); byte[] work = new byte[unitLength]; int nb = stream.read(work); dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work))); ByteBuffer bb_field = ByteBuffer.wrap(work); if (isLittleEndian) { bb_field.order(ByteOrder.LITTLE_ENDIAN); } String dataInHex = new String(Hex.encodeHex(bb_field.array())); /// releaseMachineSpecificInfoHex.add(dataInHex); dbgLog.finer("raw bytes in Hex:" + dataInHex); if (unitLength == 4) { int fieldData = bb_field.getInt(); dbgLog.finer("fieldData(int)=" + fieldData); dbgLog.finer("fieldData in Hex=0x" + Integer.toHexString(fieldData)); /// releaseMachineSpecificInfo.add(fieldData); } } /// dbgLog.fine("releaseMachineSpecificInfo="+releaseMachineSpecificInfo); /// dbgLog.fine("releaseMachineSpecificInfoHex="+releaseMachineSpecificInfoHex); } else { // throw new IOException } dbgLog.fine("***** end of subType 3 ***** \n"); break; case 4: // Release andMachine-SpecificOBS-Type Information headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; int numberOfUnits = headerSection[1]; for (int i = 0; i < numberOfUnits; i++) { dbgLog.finer(i + "-th fieldData:" + RecordType7SubType4Fields.get(i)); byte[] work = new byte[unitLength]; int nb = stream.read(work); dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work))); ByteBuffer bb_field = ByteBuffer.wrap(work); dbgLog.finer("byte order=" + bb_field.order().toString()); if (isLittleEndian) { bb_field.order(ByteOrder.LITTLE_ENDIAN); } ByteBuffer bb_field_dup = bb_field.duplicate(); OBSTypeHexValue.put(RecordType7SubType4Fields.get(i), new String(Hex.encodeHex(bb_field.array()))); // dbgLog.finer("raw bytes in Hex:"+ // OBSTypeHexValue.get(RecordType7SubType4Fields.get(i))); if (unitLength == 8) { double fieldData = bb_field.getDouble(); /// OBSTypeValue.put(RecordType7SubType4Fields.get(i), fieldData); dbgLog.finer("fieldData(double)=" + fieldData); OBSTypeHexValue.put(RecordType7SubType4Fields.get(i), Double.toHexString(fieldData)); dbgLog.fine("fieldData in Hex=" + Double.toHexString(fieldData)); } } /// dbgLog.fine("OBSTypeValue="+OBSTypeValue); /// dbgLog.fine("OBSTypeHexValue="+OBSTypeHexValue); } else { // throw new IOException } dbgLog.fine("***** end of subType 4 ***** \n"); break; case 5: // Variable Sets Information parseRT7SubTypefield(stream); break; case 6: // Trends date information parseRT7SubTypefield(stream); break; case 7: // Multiple response groups parseRT7SubTypefield(stream); break; case 8: // Windows Data Entry data parseRT7SubTypefield(stream); break; case 9: // parseRT7SubTypefield(stream); break; case 10: // TextSmart data parseRT7SubTypefield(stream); break; case 11: // Msmt level, col width, & alignment //parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; int numberOfUnits = headerSection[1]; for (int i = 0; i < numberOfUnits; i++) { dbgLog.finer(i + "-th fieldData"); byte[] work = new byte[unitLength]; int nb = stream.read(work); dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work))); ByteBuffer bb_field = ByteBuffer.wrap(work); if (isLittleEndian) { bb_field.order(ByteOrder.LITTLE_ENDIAN); } dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(bb_field.array()))); if (unitLength == 4) { int fieldData = bb_field.getInt(); dbgLog.finer("fieldData(int)=" + fieldData); dbgLog.finer("fieldData in Hex=0x" + Integer.toHexString(fieldData)); int remainder = i % 3; dbgLog.finer("remainder=" + remainder); if (remainder == 0) { /// measurementLevel.add(fieldData); } else if (remainder == 1) { /// columnWidth.add(fieldData); } else if (remainder == 2) { /// alignment.add(fieldData); } } } } else { // throw new IOException } /// dbgLog.fine("measurementLevel="+measurementLevel); /// dbgLog.fine("columnWidth="+columnWidth); /// dbgLog.fine("alignment="+alignment); dbgLog.fine("end of subType 11\n"); break; case 12: // Windows Data Entry GUID parseRT7SubTypefield(stream); break; case 13: // Extended variable names // parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; dbgLog.fine("RT7: unitLength=" + unitLength); int numberOfUnits = headerSection[1]; dbgLog.fine("RT7: numberOfUnits=" + numberOfUnits); byte[] work = new byte[unitLength * numberOfUnits]; int nbtyes13 = stream.read(work); String[] variableShortLongNamePairs = new String(work, "US-ASCII").split("\t"); for (int i = 0; i < variableShortLongNamePairs.length; i++) { dbgLog.fine("RT7: " + i + "-th pair" + variableShortLongNamePairs[i]); String[] pair = variableShortLongNamePairs[i].split("="); shortToLongVariableNameTable.put(pair[0], pair[1]); } dbgLog.fine("RT7: shortToLongVarialbeNameTable" + shortToLongVariableNameTable); // We are saving the short-to-long name map; at the // end of ingest, we'll go through the data variables and // change the names accordingly. // smd.setShortToLongVarialbeNameTable(shortToLongVarialbeNameTable); } else { // throw new IOException } break; case 14: // Extended strings //parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; dbgLog.fine("RT7.14: unitLength=" + unitLength); int numberOfUnits = headerSection[1]; dbgLog.fine("RT7.14: numberOfUnits=" + numberOfUnits); byte[] work = new byte[unitLength * numberOfUnits]; int nbtyes13 = stream.read(work); String[] extendedVariablesSizePairs = new String(work, defaultCharSet).split("\000\t"); for (int i = 0; i < extendedVariablesSizePairs.length; i++) { dbgLog.fine("RT7.14: " + i + "-th pair" + extendedVariablesSizePairs[i]); if (extendedVariablesSizePairs[i].indexOf("=") > 0) { String[] pair = extendedVariablesSizePairs[i].split("="); extendedVariablesSizeTable.put(pair[0], Integer.valueOf(pair[1])); } } dbgLog.fine("RT7.14: extendedVariablesSizeTable" + extendedVariablesSizeTable); } else { // throw new IOException } break; case 15: // Clementine Metadata parseRT7SubTypefield(stream); break; case 16: // 64 bit N of cases parseRT7SubTypefield(stream); break; case 17: // File level attributes parseRT7SubTypefield(stream); break; case 18: // Variable attributes parseRT7SubTypefield(stream); break; case 19: // Extended multiple response groups parseRT7SubTypefield(stream); break; case 20: // Character encoding, aka code page. // Must be a version 16+ feature (?). // Starting v.16, the default character encoding for SAV // files is UTF-8; but then it is possible to specify an // alternative encoding here. // A typical use case would be people setting it to "ISO-Latin" // or "windows-1252", or a similar 8-bit encoding to store // text with standard Western European accents. // -- L.A. headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; dbgLog.fine("RT7-20: unitLength=" + unitLength); int numberOfUnits = headerSection[1]; dbgLog.fine("RT7-20: numberOfUnits=" + numberOfUnits); byte[] rt7st20bytes = new byte[unitLength * numberOfUnits]; int nbytes20 = stream.read(rt7st20bytes); String dataCharSet = new String(rt7st20bytes, "US-ASCII"); if (dataCharSet != null && !(dataCharSet.equals(""))) { dbgLog.fine("RT7-20: data charset: " + dataCharSet); defaultCharSet = dataCharSet; } } /*else { // TODO: // decide if the exception should actually be thrown here! // -- L.A. 4.0 beta // throw new IOException }*/ break; case 21: // Value labels for long strings parseRT7SubTypefield(stream); break; case 22: // Missing values for long strings parseRT7SubTypefield(stream); break; default: parseRT7SubTypefield(stream); } } catch (IOException ex) { //ex.printStackTrace(); throw ex; } counter++; if (counter > 20) { break; } } dbgLog.fine("RT7: counter=" + counter); dbgLog.fine("RT7: decodeRecordType7(): end"); }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java
void decodeRecordType7(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType7(): start *****"); int counter = 0; int[] headerSection = new int[2]; // the variables below may no longer needed; // but they may be useful for debugging/logging purposes. /// // RecordType 7 /// // Subtype 3 /// List<Integer> releaseMachineSpecificInfo = new ArrayList<Integer>(); /// List<String> releaseMachineSpecificInfoHex = new ArrayList<String>(); /// // Subytpe 4 /// Map<String, Double> OBSTypeValue = new LinkedHashMap<String, Double>(); /// Map<String, String> OBSTypeHexValue = new LinkedHashMap<String, String>(); //Subtype 11/*from w w w.j a va 2 s . com*/ /// List<Integer> measurementLevel = new ArrayList<Integer>(); /// List<Integer> columnWidth = new ArrayList<Integer>(); /// List<Integer> alignment = new ArrayList<Integer>(); Map<String, String> shortToLongVarialbeNameTable = new LinkedHashMap<String, String>(); while (true) { try { if (stream == null) { throw new IllegalArgumentException("RT7: stream == null!"); } // first check the 4-byte header value //if (stream.markSupported()){ stream.mark(1000); //} // 7.0 check the first 4 bytes byte[] headerCodeRt7 = new byte[LENGTH_RECORD_TYPE7_CODE]; int nbytes_rt7 = stream.read(headerCodeRt7, 0, LENGTH_RECORD_TYPE7_CODE); // to-do check against nbytes //printHexDump(headerCodeRt7, "RT7 header test"); ByteBuffer bb_header_code_rt7 = ByteBuffer.wrap(headerCodeRt7, 0, LENGTH_RECORD_TYPE7_CODE); if (isLittleEndian) { bb_header_code_rt7.order(ByteOrder.LITTLE_ENDIAN); } int intRT7test = bb_header_code_rt7.getInt(); dbgLog.fine("RT7: header test value=" + intRT7test); if (intRT7test != 7) { //if (stream.markSupported()){ //out.print("iteration="+safteyCounter); //dbgLog.fine("iteration="+safteyCounter); dbgLog.fine("intRT7test failed=" + intRT7test); dbgLog.fine("counter=" + counter); stream.reset(); return; //} } // 7.1 check 4-byte integer Sub-Type Code byte[] length_sub_type_code = new byte[LENGTH_RT7_SUB_TYPE_CODE]; int nbytes_rt7_1 = stream.read(length_sub_type_code, 0, LENGTH_RT7_SUB_TYPE_CODE); // to-do check against nbytes //printHexDump(length_how_many_line_bytes, "RT7 how_many_line_bytes"); ByteBuffer bb_sub_type_code = ByteBuffer.wrap(length_sub_type_code, 0, LENGTH_RT7_SUB_TYPE_CODE); if (isLittleEndian) { bb_sub_type_code.order(ByteOrder.LITTLE_ENDIAN); } int subTypeCode = bb_sub_type_code.getInt(); dbgLog.fine("RT7: subTypeCode=" + subTypeCode); switch (subTypeCode) { case 3: // 3: Release andMachine-Specific Integer Information //parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; int numberOfUnits = headerSection[1]; for (int i = 0; i < numberOfUnits; i++) { dbgLog.finer(i + "-th fieldData"); byte[] work = new byte[unitLength]; int nb = stream.read(work); dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work))); ByteBuffer bb_field = ByteBuffer.wrap(work); if (isLittleEndian) { bb_field.order(ByteOrder.LITTLE_ENDIAN); } String dataInHex = new String(Hex.encodeHex(bb_field.array())); /// releaseMachineSpecificInfoHex.add(dataInHex); dbgLog.finer("raw bytes in Hex:" + dataInHex); if (unitLength == 4) { int fieldData = bb_field.getInt(); dbgLog.finer("fieldData(int)=" + fieldData); dbgLog.finer("fieldData in Hex=0x" + Integer.toHexString(fieldData)); /// releaseMachineSpecificInfo.add(fieldData); } } /// dbgLog.fine("releaseMachineSpecificInfo="+releaseMachineSpecificInfo); /// dbgLog.fine("releaseMachineSpecificInfoHex="+releaseMachineSpecificInfoHex); } else { // throw new IOException } dbgLog.fine("***** end of subType 3 ***** \n"); break; case 4: // Release andMachine-SpecificOBS-Type Information headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; int numberOfUnits = headerSection[1]; for (int i = 0; i < numberOfUnits; i++) { dbgLog.finer(i + "-th fieldData:" + RecordType7SubType4Fields.get(i)); byte[] work = new byte[unitLength]; int nb = stream.read(work); dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work))); ByteBuffer bb_field = ByteBuffer.wrap(work); dbgLog.finer("byte order=" + bb_field.order().toString()); if (isLittleEndian) { bb_field.order(ByteOrder.LITTLE_ENDIAN); } ByteBuffer bb_field_dup = bb_field.duplicate(); OBSTypeHexValue.put(RecordType7SubType4Fields.get(i), new String(Hex.encodeHex(bb_field.array()))); // dbgLog.finer("raw bytes in Hex:"+ // OBSTypeHexValue.get(RecordType7SubType4Fields.get(i))); if (unitLength == 8) { double fieldData = bb_field.getDouble(); /// OBSTypeValue.put(RecordType7SubType4Fields.get(i), fieldData); dbgLog.finer("fieldData(double)=" + fieldData); OBSTypeHexValue.put(RecordType7SubType4Fields.get(i), Double.toHexString(fieldData)); dbgLog.fine("fieldData in Hex=" + Double.toHexString(fieldData)); } } /// dbgLog.fine("OBSTypeValue="+OBSTypeValue); /// dbgLog.fine("OBSTypeHexValue="+OBSTypeHexValue); } else { // throw new IOException } dbgLog.fine("***** end of subType 4 ***** \n"); break; case 5: // Variable Sets Information parseRT7SubTypefield(stream); break; case 6: // Trends date information parseRT7SubTypefield(stream); break; case 7: // Multiple response groups parseRT7SubTypefield(stream); break; case 8: // Windows Data Entry data parseRT7SubTypefield(stream); break; case 9: // parseRT7SubTypefield(stream); break; case 10: // TextSmart data parseRT7SubTypefield(stream); break; case 11: // Msmt level, col width, & alignment //parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; int numberOfUnits = headerSection[1]; for (int i = 0; i < numberOfUnits; i++) { dbgLog.finer(i + "-th fieldData"); byte[] work = new byte[unitLength]; int nb = stream.read(work); dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work))); ByteBuffer bb_field = ByteBuffer.wrap(work); if (isLittleEndian) { bb_field.order(ByteOrder.LITTLE_ENDIAN); } dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(bb_field.array()))); if (unitLength == 4) { int fieldData = bb_field.getInt(); dbgLog.finer("fieldData(int)=" + fieldData); dbgLog.finer("fieldData in Hex=0x" + Integer.toHexString(fieldData)); int remainder = i % 3; dbgLog.finer("remainder=" + remainder); if (remainder == 0) { /// measurementLevel.add(fieldData); } else if (remainder == 1) { /// columnWidth.add(fieldData); } else if (remainder == 2) { /// alignment.add(fieldData); } } } } else { // throw new IOException } /// dbgLog.fine("measurementLevel="+measurementLevel); /// dbgLog.fine("columnWidth="+columnWidth); /// dbgLog.fine("alignment="+alignment); dbgLog.fine("***** end of subType 11 ***** \n"); break; case 12: // Windows Data Entry GUID parseRT7SubTypefield(stream); break; case 13: // Extended variable names // parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; dbgLog.fine("RT7: unitLength=" + unitLength); int numberOfUnits = headerSection[1]; dbgLog.fine("RT7: numberOfUnits=" + numberOfUnits); byte[] work = new byte[unitLength * numberOfUnits]; int nbtyes13 = stream.read(work); String[] variableShortLongNamePairs = new String(work, "US-ASCII").split("\t"); for (int i = 0; i < variableShortLongNamePairs.length; i++) { dbgLog.fine("RT7: " + i + "-th pair" + variableShortLongNamePairs[i]); String[] pair = variableShortLongNamePairs[i].split("="); shortToLongVarialbeNameTable.put(pair[0], pair[1]); } dbgLog.fine("RT7: shortToLongVarialbeNameTable" + shortToLongVarialbeNameTable); smd.setShortToLongVarialbeNameTable(shortToLongVarialbeNameTable); } else { // throw new IOException } break; case 14: // Extended strings //parseRT7SubTypefield(stream); headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null) { int unitLength = headerSection[0]; dbgLog.fine("RT7.14: unitLength=" + unitLength); int numberOfUnits = headerSection[1]; dbgLog.fine("RT7.14: numberOfUnits=" + numberOfUnits); byte[] work = new byte[unitLength * numberOfUnits]; int nbtyes13 = stream.read(work); String[] extendedVariablesSizePairs = new String(work, defaultCharSet).split("\000\t"); for (int i = 0; i < extendedVariablesSizePairs.length; i++) { dbgLog.fine("RT7.14: " + i + "-th pair" + extendedVariablesSizePairs[i]); if (extendedVariablesSizePairs[i].indexOf("=") > 0) { String[] pair = extendedVariablesSizePairs[i].split("="); extendedVariablesSizeTable.put(pair[0], Integer.valueOf(pair[1])); } } dbgLog.fine("RT7.14: extendedVariablesSizeTable" + extendedVariablesSizeTable); } else { // throw new IOException } break; case 15: // Clementine Metadata parseRT7SubTypefield(stream); break; case 16: // 64 bit N of cases parseRT7SubTypefield(stream); break; case 17: // File level attributes parseRT7SubTypefield(stream); break; case 18: // Variable attributes parseRT7SubTypefield(stream); break; case 19: // Extended multiple response groups parseRT7SubTypefield(stream); break; case 20: // Encoding, aka code page parseRT7SubTypefield(stream); /* TODO: This needs to be researched; * Is this field really used, ever? headerSection = parseRT7SubTypefieldHeader(stream); if (headerSection != null){ int unitLength = headerSection[0]; dbgLog.fine("RT7-20: unitLength="+unitLength); int numberOfUnits = headerSection[1]; dbgLog.fine("RT7-20: numberOfUnits="+numberOfUnits); byte[] rt7st20bytes = new byte[unitLength*numberOfUnits]; int nbytes20 = stream.read(rt7st20bytes); String dataCharSet = new String(rt7st20bytes,"US-ASCII"); if (dataCharSet != null && !(dataCharSet.equals(""))) { dbgLog.fine("RT7-20: data charset: "+ dataCharSet); defaultCharSet = dataCharSet; } } else { // throw new IOException } * */ break; case 21: // Value labels for long strings parseRT7SubTypefield(stream); break; case 22: // Missing values for long strings parseRT7SubTypefield(stream); break; default: parseRT7SubTypefield(stream); } } catch (IOException ex) { //ex.printStackTrace(); throw ex; } counter++; if (counter > 20) { break; } } dbgLog.fine("RT7: counter=" + counter); dbgLog.fine("RT7: ***** decodeRecordType7(): end *****"); }
From source file:org.apache.cassandra.db.index.sasi.plan.Expression.java
private boolean validateStringValue(ByteBuffer columnValue, ByteBuffer requestedValue) { analyzer.reset(columnValue.duplicate()); while (analyzer.hasNext()) { ByteBuffer term = analyzer.next(); if (ByteBufferUtil.contains(term, requestedValue)) return true; }//from w ww.ja v a2s . c o m return false; }
From source file:org.apache.cassandra.index.sasi.plan.Expression.java
private boolean validateStringValue(ByteBuffer columnValue, ByteBuffer requestedValue) { analyzer.reset(columnValue.duplicate()); while (analyzer.hasNext()) { ByteBuffer term = analyzer.next(); boolean isMatch = false; switch (operation) { case EQ:/*from w w w. j a v a2s . com*/ case MATCH: // Operation.isSatisfiedBy handles conclusion on !=, // here we just need to make sure that term matched it case NOT_EQ: isMatch = validator.compare(term, requestedValue) == 0; break; case PREFIX: isMatch = ByteBufferUtil.startsWith(term, requestedValue); break; case SUFFIX: isMatch = ByteBufferUtil.endsWith(term, requestedValue); break; case CONTAINS: isMatch = ByteBufferUtil.contains(term, requestedValue); break; } if (isMatch) return true; } return false; }