List of usage examples for java.nio ByteBuffer getInt
public abstract int getInt();
From source file:au.org.ala.layers.intersect.Grid.java
public void getClassInfo(Map<Float, float[]> info) { long length = ((long) nrows) * ((long) ncols); RandomAccessFile afile = null; File f2 = new File(filename + ".GRI"); try { //read of random access file can throw an exception if (!f2.exists()) { afile = new RandomAccessFile(filename + ".gri", "r"); } else {/* www .j a v a 2s. co m*/ afile = new RandomAccessFile(filename + ".GRI", "r"); } byte[] b = new byte[65536]; long i = 0; long max = 0; long len; float v; float ndv = (float) nodatavalue; while ((len = afile.read(b)) > 0) { ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } if (datatype.equalsIgnoreCase("UBYTE")) { max += len; max = Math.min(max, length); for (; i < max; i++) { v = bb.get(); if (v < 0) v += 256; if (v != ndv) updatesStats(info, i, v * rescale); } } else if (datatype.equalsIgnoreCase("BYTE")) { max += len; max = Math.min(max, length); for (; i < max; i++) { v = bb.get(); if (v != ndv) updatesStats(info, i, v * rescale); } } else if (datatype.equalsIgnoreCase("SHORT")) { max += len / 2; max = Math.min(max, length); for (; i < max; i++) { v = bb.getShort(); if (v != ndv) updatesStats(info, i, v * rescale); } } else if (datatype.equalsIgnoreCase("INT")) { max += len / 4; max = Math.min(max, length); for (; i < max; i++) { v = bb.getInt(); if (v != ndv) updatesStats(info, i, v * rescale); } } else if (datatype.equalsIgnoreCase("LONG")) { max += len / 8; max = Math.min(max, length); for (; i < max; i++) { v = bb.getLong(); if (v != ndv) updatesStats(info, i, v * rescale); } } else if (datatype.equalsIgnoreCase("FLOAT")) { max += len / 4; max = Math.min(max, length); for (; i < max; i++) { v = bb.getFloat(); if (v != ndv) updatesStats(info, i, v * rescale); } } else if (datatype.equalsIgnoreCase("DOUBLE")) { max += len / 8; max = Math.min(max, length); for (; i < max; i++) { v = (float) bb.getDouble(); if (v != ndv) updatesStats(info, i, v * rescale); } } else { max += len / 4; for (; i < max; i++) { // should not happen; catch anyway... } } } } catch (Exception e) { logger.error("An error has occurred getting grid class stats", e); } finally { if (afile != null) { try { afile.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } }
From source file:org.alfresco.repo.search.impl.lucene.index.IndexInfo.java
private String readString(ByteBuffer buffer, CRC32 crc32) throws UnsupportedEncodingException { int size = buffer.getInt(); byte[] bytes = new byte[size]; buffer.get(bytes);//from www. ja va 2s . c om char[] chars = new char[size]; for (int i = 0; i < size; i++) { chars[i] = (char) bytes[i]; } crc32.update(bytes); return new String(chars); }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
private void parseRT7SubTypefield(BufferedInputStream stream) throws IOException { int length_unit_length = 4; int length_number_of_units = 4; int storage_size = length_unit_length + length_number_of_units; int[] headerSection = new int[2]; byte[] byteStorage = new byte[storage_size]; try {// w w w. ja v a 2 s. c o m int nbytes = stream.read(byteStorage); // to-do check against nbytes //printHexDump(byteStorage, "RT7:storage"); ByteBuffer bb_data_type = ByteBuffer.wrap(byteStorage, 0, length_unit_length); if (isLittleEndian) { bb_data_type.order(ByteOrder.LITTLE_ENDIAN); } int unitLength = bb_data_type.getInt(); dbgLog.fine("parseRT7 SubTypefield: unitLength=" + unitLength); ByteBuffer bb_number_of_units = ByteBuffer.wrap(byteStorage, length_unit_length, length_number_of_units); if (isLittleEndian) { bb_number_of_units.order(ByteOrder.LITTLE_ENDIAN); } int numberOfUnits = bb_number_of_units.getInt(); dbgLog.fine("parseRT7 SubTypefield: numberOfUnits=" + numberOfUnits); headerSection[0] = unitLength; headerSection[1] = numberOfUnits; for (int i = 0; i < numberOfUnits; i++) { byte[] work = new byte[unitLength]; int nb = stream.read(work); dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work))); ByteBuffer bb_field = ByteBuffer.wrap(work); if (isLittleEndian) { bb_field.order(ByteOrder.LITTLE_ENDIAN); } dbgLog.fine("RT7ST: raw bytes in Hex:" + new String(Hex.encodeHex(bb_field.array()))); if (unitLength == 4) { int fieldData = bb_field.getInt(); dbgLog.fine("RT7ST: " + i + "-th fieldData=" + fieldData); dbgLog.fine("RT7ST: fieldData in Hex=" + Integer.toHexString(fieldData)); } else if (unitLength == 8) { double fieldData = bb_field.getDouble(); dbgLog.finer("RT7ST: " + i + "-th fieldData=" + fieldData); dbgLog.finer("RT7ST: fieldData in Hex=" + Double.toHexString(fieldData)); } dbgLog.finer(""); } } catch (IOException ex) { //ex.printStackTrace(); throw ex; } }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
void decodeRecordType6(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType6(): start *****"); try {/*from w w w. j a v a 2s . c o m*/ if (stream == null) { throw new IllegalArgumentException("stream == null!"); } // this section is optional; so let's first check the 4-byte header // value and see what type it is. //if (stream.markSupported()){ // -- ? L.A. 4.0 alpha stream.mark(1000); //} // 6.0 check the first 4 bytes byte[] headerCodeRt6 = new byte[LENGTH_RECORD_TYPE6_CODE]; int nbytes_rt6 = stream.read(headerCodeRt6, 0, LENGTH_RECORD_TYPE6_CODE); // to-do check against nbytes //printHexDump(headerCodeRt6, "RT6 header test"); ByteBuffer bb_header_code_rt6 = ByteBuffer.wrap(headerCodeRt6, 0, LENGTH_RECORD_TYPE6_CODE); if (isLittleEndian) { bb_header_code_rt6.order(ByteOrder.LITTLE_ENDIAN); } int intRT6test = bb_header_code_rt6.getInt(); dbgLog.fine("RT6: header test value=" + intRT6test); if (intRT6test != 6) { //if (stream.markSupported()){ //out.print("iteration="+safteyCounter); //dbgLog.fine("iteration="+safteyCounter); dbgLog.fine("intRT6test failed=" + intRT6test); stream.reset(); return; //} } // 6.1 check 4-byte integer that tells how many lines follow byte[] length_how_many_line_bytes = new byte[LENGTH_RT6_HOW_MANY_LINES]; int nbytes_rt6_1 = stream.read(length_how_many_line_bytes, 0, LENGTH_RT6_HOW_MANY_LINES); // to-do check against nbytes //printHexDump(length_how_many_line_bytes, "RT6 how_many_line_bytes"); ByteBuffer bb_how_many_lines = ByteBuffer.wrap(length_how_many_line_bytes, 0, LENGTH_RT6_HOW_MANY_LINES); if (isLittleEndian) { bb_how_many_lines.order(ByteOrder.LITTLE_ENDIAN); } int howManyLinesRt6 = bb_how_many_lines.getInt(); dbgLog.fine("how Many lines follow=" + howManyLinesRt6); // 6.2 read 80-char-long lines String[] documentRecord = new String[howManyLinesRt6]; for (int i = 0; i < howManyLinesRt6; i++) { byte[] line = new byte[80]; int nbytes_rt6_line = stream.read(line); documentRecord[i] = StringUtils.stripEnd( new String(Arrays.copyOfRange(line, 0, LENGTH_RT6_DOCUMENT_LINE), defaultCharSet), " "); dbgLog.fine(i + "-th line =" + documentRecord[i] + "<-"); } dbgLog.fine("documentRecord:\n" + StringUtils.join(documentRecord, "\n")); } catch (IOException ex) { //ex.printStackTrace(); throw ex; } dbgLog.fine("decodeRecordType6(): end"); }
From source file:com.inclouds.hbase.rowcache.RowCache.java
/** * Reads content of table:row:family.//from www . ja v a2 s . co m * * @param tableName * the table name * @param row * the row * @param columnFamily * the column family * @return the list * @throws IOException * Signals that an I/O exception has occurred. */ private List<KeyValue> readFamily(byte[] tableName, byte[] row, byte[] columnFamily) throws IOException { List<KeyValue> result = new ArrayList<KeyValue>(); ByteBuffer buf = getLocalByteBuffer();// bufTLS.get(); long bufptr = NativeMemory.getBufferAddress(buf); prepareKeyForGet(bufptr, tableName, row, 0, row.length, columnFamily, null); try { rowCache.getDecompress(buf); } catch (NativeMemoryException e) { throw new IOException(e); } if (isNotNull(buf) == false) { // Return empty list // *DEBUG*/LOG.info("not found for: "+new String(row)+":"+new // String(columnFamily)); return result; } // Now we have in buffer: // 4 bytes - total length // 8 bytes - address // List of columns: // 4 bytes - total columns // Column: // 4 bytes - qualifier length // qualifier // 4 bytes - total versions // list of versions: // { 8 bytes - timestamp // 4 bytes - value length // value // } int offset = 12; buf.position(offset); int totalColumns = buf.getInt(); int i = 0; // if(isTrace()){ ///*DEBUG*/LOG.info(" A:"+ buf.getLong(4)+" TL:"+ buf.getInt(0)+" TC:"+totalColumns); // } bufptr += 16; while (i++ < totalColumns) { // result = readColumn(buf, row, columnFamily, result); bufptr = readColumn(bufptr, row, columnFamily, result); } return result; }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
void decodeRecordType999(BufferedInputStream stream) throws IOException { dbgLog.fine("decodeRecordType999(): start"); try {/*from ww w .j a v a 2 s. co m*/ if (stream == null) { throw new IllegalArgumentException("RT999: stream == null!"); } // first check the 4-byte header value //if (stream.markSupported()){ stream.mark(1000); //} // 999.0 check the first 4 bytes byte[] headerCodeRt999 = new byte[LENGTH_RECORD_TYPE999_CODE]; //dbgLog.fine("RT999: stream position="+stream.pos); int nbytes_rt999 = stream.read(headerCodeRt999, 0, LENGTH_RECORD_TYPE999_CODE); // to-do check against nbytes //printHexDump(headerCodeRt999, "RT999 header test"); ByteBuffer bb_header_code_rt999 = ByteBuffer.wrap(headerCodeRt999, 0, LENGTH_RECORD_TYPE999_CODE); if (isLittleEndian) { bb_header_code_rt999.order(ByteOrder.LITTLE_ENDIAN); } int intRT999test = bb_header_code_rt999.getInt(); dbgLog.fine("header test value: RT999=" + intRT999test); if (intRT999test != 999) { //if (stream.markSupported()){ dbgLog.fine("intRT999test failed=" + intRT999test); stream.reset(); throw new IOException("RT999:Header value(999) was not correctly detected:" + intRT999test); //} } // 999.1 check 4-byte integer Filler block byte[] length_filler = new byte[LENGTH_RT999_FILLER]; int nbytes_rt999_1 = stream.read(length_filler, 0, LENGTH_RT999_FILLER); // to-do check against nbytes //printHexDump(length_how_many_line_bytes, "RT999 how_many_line_bytes"); ByteBuffer bb_filler = ByteBuffer.wrap(length_filler, 0, LENGTH_RT999_FILLER); if (isLittleEndian) { bb_filler.order(ByteOrder.LITTLE_ENDIAN); } int rt999filler = bb_filler.getInt(); dbgLog.fine("rt999filler=" + rt999filler); if (rt999filler == 0) { dbgLog.fine("the end of the dictionary section"); } else { throw new IOException("RT999: failed to detect the end mark(0): value=" + rt999filler); } // missing value processing concerning HIGHEST/LOWEST values Set<Map.Entry<String, InvalidData>> msvlc = invalidDataTable.entrySet(); for (Iterator<Map.Entry<String, InvalidData>> itc = msvlc.iterator(); itc.hasNext();) { Map.Entry<String, InvalidData> et = itc.next(); String variable = et.getKey(); dbgLog.fine("variable=" + variable); InvalidData invalidDataInfo = et.getValue(); if (invalidDataInfo.getInvalidRange() != null && !invalidDataInfo.getInvalidRange().isEmpty()) { if (invalidDataInfo.getInvalidRange().get(0).equals(OBSTypeHexValue.get("LOWEST"))) { dbgLog.fine("1st value is LOWEST"); invalidDataInfo.getInvalidRange().set(0, "LOWEST"); } else if (invalidDataInfo.getInvalidRange().get(1).equals(OBSTypeHexValue.get("HIGHEST"))) { dbgLog.fine("2nd value is HIGHEST"); invalidDataInfo.getInvalidRange().set(1, "HIGHEST"); } } } dbgLog.fine("invalidDataTable:\n" + invalidDataTable); // TODO: take care of the invalid data! - add the appropriate // value labels (?) // should it be done here, or at the end of ingest? // -- L.A. 4.0 alpha ///smd.setInvalidDataTable(invalidDataTable); } catch (IOException ex) { //ex.printStackTrace(); //exit(1); throw ex; } dbgLog.fine("decodeRecordType999(): end"); }
From source file:au.org.ala.layers.intersect.Grid.java
/** * Increase sampleEveryNthPoint to return a smaller grid. * * Grid max and min values may be skipped. * * This does not used previously cached data. * * @param sampleEveryNthPoint// w w w. j a v a 2 s .c om * @return */ public float[] getGrid(int sampleEveryNthPoint) { int maxArrayLength = Integer.MAX_VALUE - 10; if (subgrids != null) { //sample points int size = 1000; double[][] points = new double[size * size][2]; int pos = 0; for (int i = 0; i < 1000; i++) { for (int j = 0; j < 1000; j++) { points[pos][0] = xmin + (xmax - xmin) * j / (double) size; points[pos][1] = ymax - (ymax - ymin) * i / (double) size; pos++; } } return getValues3(points, 64); } int length = (nrows / sampleEveryNthPoint) * (ncols); float[] ret = new float[length]; RandomAccessFile afile = null; File f2 = new File(filename + ".GRI"); try { //read of random access file can throw an exception if (!f2.exists()) { afile = new RandomAccessFile(filename + ".gri", "r"); } else { afile = new RandomAccessFile(filename + ".GRI", "r"); } int sz = (int) Math.min(afile.length() / sampleEveryNthPoint / sampleEveryNthPoint, maxArrayLength); sz += 8 - sz % 8; byte[] b = new byte[sz]; long i = 0; long max = 0; int len; while ((len = afile.read(b)) > 0) { ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } if (datatype.equalsIgnoreCase("UBYTE")) { max += len; max = Math.min(max, ret.length * (long) sampleEveryNthPoint); for (; i < max; i++) { ret[(int) (i / sampleEveryNthPoint)] = bb.get(); if (ret[(int) (i / sampleEveryNthPoint)] < 0) { ret[(int) (i / sampleEveryNthPoint)] += 256; } } } else if (datatype.equalsIgnoreCase("BYTE")) { max += len; max = Math.min(max, ret.length * (long) sampleEveryNthPoint); for (; i < max; i++) { ret[(int) (i / sampleEveryNthPoint)] = bb.get(); } } else if (datatype.equalsIgnoreCase("SHORT")) { max += len / 2; max = Math.min(max, ret.length * (long) sampleEveryNthPoint); for (; i < max; i++) { ret[(int) (i / sampleEveryNthPoint)] = bb.getShort(); } } else if (datatype.equalsIgnoreCase("INT")) { max += len / 4; max = Math.min(max, ret.length * (long) sampleEveryNthPoint); for (; i < max; i++) { ret[(int) (i / sampleEveryNthPoint)] = bb.getInt(); } } else if (datatype.equalsIgnoreCase("LONG")) { max += len / 8; max = Math.min(max, ret.length * (long) sampleEveryNthPoint); for (; i < max; i++) { ret[(int) (i / sampleEveryNthPoint)] = bb.getLong(); } } else if (datatype.equalsIgnoreCase("FLOAT")) { max += len / 4; max = Math.min(max, ret.length * (long) sampleEveryNthPoint); for (; i < max; i++) { ret[(int) (i / sampleEveryNthPoint)] = bb.getFloat(); } } else if (datatype.equalsIgnoreCase("DOUBLE")) { max += len / 8; max = Math.min(max, ret.length * (long) sampleEveryNthPoint); for (; i < max; i++) { ret[(int) (i / (long) sampleEveryNthPoint)] = (float) bb.getDouble(); } } else { // / should not happen; catch anyway... max += len / 4; for (; i < max; i++) { ret[(int) (i / (long) sampleEveryNthPoint)] = Float.NaN; } } } //replace not a number for (i = 0; i < length; i++) { if ((float) ret[(int) i] == (float) nodatavalue) { ret[(int) i] = Float.NaN; } else { ret[(int) i] *= rescale; } } } catch (Exception e) { logger.error("An error has occurred - probably a file error", e); } finally { if (afile != null) { try { afile.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } grid_data = ret; return ret; }
From source file:au.org.ala.layers.intersect.Grid.java
float[] getGrid(double xmin, double ymin, double xmax, double ymax) { //expects largest y at the top //expects input ranges inside of grid ranges int width = (int) ((xmax - xmin) / xres); int height = (int) ((ymax - ymin) / yres); int startx = (int) ((xmin - this.xmin) / xres); int endx = startx + width; int starty = (int) ((ymin - this.ymin) / yres); //int endy = starty + height; int length = width * height; float[] ret = new float[length]; int pos = 0;// www . ja v a 2 s . c o m int i; RandomAccessFile afile = null; File f2 = new File(filename + ".GRI"); int size = 4; if (datatype.equals("BYTE") || datatype.equals("UBYTE")) { size = 1; } else if (datatype.equals("SHORT")) { size = 2; } else if (datatype.equals("INT")) { size = 4; } else if (datatype.equals("LONG")) { size = 8; } else if (datatype.equals("FLOAT")) { size = 4; } else if (datatype.equals("DOUBLE")) { size = 8; } try { //read of random access file can throw an exception if (!f2.exists()) { afile = new RandomAccessFile(filename + ".gri", "r"); } else { afile = new RandomAccessFile(filename + ".GRI", "r"); } //seek to first raster afile.seek(((long) this.ncols) * starty * size); //read relevant rasters int readSize = this.ncols * height * size; int readLen = this.ncols * height; byte[] b = new byte[readSize]; afile.read(b); ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } if (datatype.equalsIgnoreCase("BYTE")) { for (i = 0; i < readLen; i++) { int x = i % this.ncols; if (x < startx || x >= endx) { bb.get(); } else { ret[pos++] = bb.get(); } } } else if (datatype.equalsIgnoreCase("UBYTE")) { for (i = 0; i < readLen; i++) { int x = i % this.ncols; if (x < startx || x >= endx) { bb.get(); } else { ret[pos] = bb.get(); if (ret[pos] < 0) { ret[pos] += 256; } pos++; } } } else if (datatype.equalsIgnoreCase("SHORT")) { for (i = 0; i < readLen; i++) { int x = i % this.ncols; if (x < startx || x >= endx) { bb.getShort(); } else { ret[pos++] = bb.getShort(); } } } else if (datatype.equalsIgnoreCase("INT")) { for (i = 0; i < readLen; i++) { int x = i % this.ncols; if (x < startx || x >= endx) { bb.getInt(); } else { ret[pos++] = bb.getInt(); } } } else if (datatype.equalsIgnoreCase("LONG")) { for (i = 0; i < readLen; i++) { int x = i % this.ncols; if (x < startx || x >= endx) { bb.getLong(); } else { ret[pos++] = bb.getLong(); } } } else if (datatype.equalsIgnoreCase("FLOAT")) { for (i = 0; i < readLen; i++) { int x = i % this.ncols; if (x < startx || x >= endx) { bb.getFloat(); } else { ret[pos++] = bb.getFloat(); } } } else if (datatype.equalsIgnoreCase("DOUBLE")) { for (i = 0; i < readLen; i++) { int x = i % this.ncols; if (x < startx || x >= endx) { bb.getDouble(); } else { ret[pos++] = (float) bb.getDouble(); } } } else { // / should not happen; catch anyway... for (i = 0; i < length; i++) { ret[i] = Float.NaN; } } //replace not a number for (i = 0; i < length; i++) { if ((float) ret[i] == (float) nodatavalue) { ret[i] = Float.NaN; } else { ret[i] *= rescale; } } } catch (Exception e) { logger.error("GRID: " + e.toString(), e); } finally { if (afile != null) { try { afile.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } grid_data = ret; return ret; }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java
void decodeRecordType1(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType1(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }//from w w w .jav a 2 s. c o m // how to read each recordType // 1. set-up the following objects before reading bytes // a. the working byte array // b. the storage object // the length of this field: 172bytes = 60 + 4 + 12 + 4 + 8 + 84 // this field consists of 6 distinct blocks byte[] recordType1 = new byte[LENGTH_RECORDTYPE1]; // int caseWeightVariableOBSIndex = 0; try { int nbytes = stream.read(recordType1, 0, LENGTH_RECORDTYPE1); //printHexDump(recordType1, "recordType1"); if (nbytes == 0) { throw new IOException("reading recordType1: no byte was read"); } // 1.1 60 byte-String that tells the platform/version of SPSS that // wrote this file int offset_start = 0; int offset_end = LENGTH_SPSS_PRODUCT_INFO; // 60 bytes String productInfo = new String(Arrays.copyOfRange(recordType1, offset_start, offset_end), "US-ASCII"); dbgLog.fine("productInfo:\n" + productInfo + "\n"); dataTable.setOriginalFormatVersion(productInfo); // try to parse out the SPSS version that created this data // file: String spssVersionTag = null; String regexpVersionNumber = ".*Release ([0-9]*)"; Pattern versionTagPattern = Pattern.compile(regexpVersionNumber); Matcher matcher = versionTagPattern.matcher(productInfo); if (matcher.find()) { spssVersionTag = matcher.group(1); dbgLog.fine("SPSS Version Number: " + spssVersionTag); } // TODO: // try a more elaborate regex (like the one for the "new-style" // productInfo line, below), to select the version number, the // minor version number and the platform (windows vs. mac) separately. // would be cleaner to save just that, rather than the entire // productInfo tag. // -- L.A. 4.0 beta if (spssVersionTag == null || spssVersionTag.equals("")) { // Later versions of SPSS have different formatting of the // productInfo line: regexpVersionNumber = ".* IBM SPSS STATISTICS.* ([^ ]*) ([0-9][0-9]*)([^ ]*)"; versionTagPattern = Pattern.compile(regexpVersionNumber); matcher = versionTagPattern.matcher(productInfo); if (matcher.find()) { String spssPlatformTag = matcher.group(1); spssVersionTag = matcher.group(2); String spssVersionTagMinor = matcher.group(3); dbgLog.fine("SPSS Version Number (new style): " + spssVersionTag); dbgLog.fine("SPSS Version/Platform Identification (new style:) " + spssPlatformTag + " " + spssVersionTag + spssVersionTagMinor); dataTable .setOriginalFormatVersion(spssVersionTag + spssVersionTagMinor + " " + spssPlatformTag); } } if (spssVersionTag != null && !spssVersionTag.equals("")) { spssVersionNumber = Integer.valueOf(spssVersionTag).intValue(); /* * Starting with SPSS version 16, the default encoding is * UTF-8. * But we are only going to use it if the user did not explicitly * specify the encoding on the addfiles page. Then we'd want * to stick with whatever they entered. * (also, it appears that (starting with the same version 16?) * it is actually possible to define the locale/character set * in the file - section 7, sub-type 20; TODO: decide which * one takes precedence, if we have the encoding defined both * in the file and through the UI. -- L.A. 4.0 beta) */ if (spssVersionNumber > 15) { if (getDataLanguageEncoding() == null) { //defaultCharSet = "windows-1252"; // temporary! -- L.A. "UTF-8"; defaultCharSet = "UTF-8"; } } } // TODO: // decide if we want to save the [determined/guessed] character set // somewhere in the dataset object. // this may be relevant in cases when accented/non-latin characters // get ingested incorrectly; // -- L.A. 4.0 beta // 1.2) 4-byte file-layout-code (byte-order) offset_start = offset_end; offset_end += LENGTH_FILE_LAYOUT_CODE; // 4 byte ByteBuffer bb_fileLayout_code = ByteBuffer.wrap(recordType1, offset_start, LENGTH_FILE_LAYOUT_CODE); ByteBuffer byteOderTest = bb_fileLayout_code.duplicate(); // interprete the 4 byte as int int int2test = byteOderTest.getInt(); if (int2test == 2 || int2test == 3) { dbgLog.fine("integer == " + int2test + ": the byte-oder of the writer is the same " + "as the counterpart of Java: Big Endian"); } else { // Because Java's byte-order is always big endian, // this(!=2) means this sav file was written on a little-endian machine // non-string, multi-bytes blocks must be byte-reversed bb_fileLayout_code.order(ByteOrder.LITTLE_ENDIAN); int2test = bb_fileLayout_code.getInt(); if (int2test == 2 || int2test == 3) { dbgLog.fine("The sav file was saved on a little endian machine"); dbgLog.fine("Reveral of the bytes is necessary to decode " + "multi-byte, non-string blocks"); isLittleEndian = true; } else { throw new IOException("reading recordType1:unknown file layout code=" + int2test); } } dbgLog.fine("Endian of this platform:" + ByteOrder.nativeOrder().toString()); // 1.3 4-byte Number_Of_OBS_Units_Per_Case // (= how many RT2 records => how many varilables) offset_start = offset_end; offset_end += LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE; // 4 byte ByteBuffer bb_OBS_units_per_case = ByteBuffer.wrap(recordType1, offset_start, LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE); if (isLittleEndian) { bb_OBS_units_per_case.order(ByteOrder.LITTLE_ENDIAN); } OBSUnitsPerCase = bb_OBS_units_per_case.getInt(); dbgLog.fine("RT1: OBSUnitsPerCase=" + OBSUnitsPerCase); // 1.4 4-byte Compression_Switch offset_start = offset_end; offset_end += LENGTH_COMPRESSION_SWITCH; // 4 byte ByteBuffer bb_compression_switch = ByteBuffer.wrap(recordType1, offset_start, LENGTH_COMPRESSION_SWITCH); if (isLittleEndian) { bb_compression_switch.order(ByteOrder.LITTLE_ENDIAN); } int compression_switch = bb_compression_switch.getInt(); if (compression_switch == 0) { // data section is not compressed isDataSectionCompressed = false; dbgLog.fine("data section is not compressed"); } else { dbgLog.fine("data section is compressed:" + compression_switch); } // 1.5 4-byte Case-Weight Variable Index // warning: this variable index starts from 1, not 0 offset_start = offset_end; offset_end += LENGTH_CASE_WEIGHT_VARIABLE_INDEX; // 4 byte ByteBuffer bb_Case_Weight_Variable_Index = ByteBuffer.wrap(recordType1, offset_start, LENGTH_CASE_WEIGHT_VARIABLE_INDEX); if (isLittleEndian) { bb_Case_Weight_Variable_Index.order(ByteOrder.LITTLE_ENDIAN); } caseWeightVariableOBSIndex = bb_Case_Weight_Variable_Index.getInt(); /// caseWeightVariableOBSIndex will be used later on to locate /// the weight variable; so we'll be able to mark the corresponding /// variables properly. // TODO: make sure case weight variables are properly handled! // -- L.A. 4.0 beta ///smd.getFileInformation().put("caseWeightVariableOBSIndex", caseWeightVariableOBSIndex); // 1.6 4-byte Number of Cases offset_start = offset_end; offset_end += LENGTH_NUMBER_OF_CASES; // 4 byte ByteBuffer bb_Number_Of_Cases = ByteBuffer.wrap(recordType1, offset_start, LENGTH_NUMBER_OF_CASES); if (isLittleEndian) { bb_Number_Of_Cases.order(ByteOrder.LITTLE_ENDIAN); } int numberOfCases = bb_Number_Of_Cases.getInt(); if (numberOfCases < 0) { // -1 if numberOfCases is unknown throw new RuntimeException("number of cases is not recorded in the header"); } else { dbgLog.fine("RT1: number of cases is recorded= " + numberOfCases); dataTable.setCaseQuantity(new Long(numberOfCases)); ///caseQnty = numberOfCases; ///smd.getFileInformation().put("caseQnty", numberOfCases); } // 1.7 8-byte compression-bias [not long but double] offset_start = offset_end; offset_end += LENGTH_COMPRESSION_BIAS; // 8 byte ByteBuffer bb_compression_bias = ByteBuffer .wrap(Arrays.copyOfRange(recordType1, offset_start, offset_end)); if (isLittleEndian) { bb_compression_bias.order(ByteOrder.LITTLE_ENDIAN); } Double compressionBias = bb_compression_bias.getDouble(); // TODO: // check if this "compression bias" is being used anywhere? // doesn't seem to be! // -- 4.0 alpha if (compressionBias == 100d) { // 100 is expected dbgLog.fine("compressionBias is 100 as expected"); ///smd.getFileInformation().put("compressionBias", 100); } else { dbgLog.fine("compression bias is not 100: " + compressionBias); ///smd.getFileInformation().put("compressionBias", compressionBias); } // 1.8 84-byte File Creation Information (date/time: dd MM yyhh:mm:ss + // 64-bytelabel) offset_start = offset_end; offset_end += LENGTH_FILE_CREATION_INFO; // 84 bytes String fileCreationInfo = getNullStrippedString( new String(Arrays.copyOfRange(recordType1, offset_start, offset_end), "US-ASCII")); dbgLog.fine("fileCreationInfo:\n" + fileCreationInfo + "\n"); String fileCreationDate = fileCreationInfo.substring(0, length_file_creation_date); int dateEnd = length_file_creation_date + length_file_creation_time; String fileCreationTime = fileCreationInfo.substring(length_file_creation_date, (dateEnd)); String fileCreationNote = fileCreationInfo.substring(dateEnd, length_file_creation_label); dbgLog.fine("fileDate=" + fileCreationDate); dbgLog.fine("fileTime=" + fileCreationTime); dbgLog.fine("fileNote" + fileCreationNote); } catch (IOException ex) { throw ex; } dbgLog.fine("decodeRecordType1(): end"); }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java
void decodeRecordType6(BufferedInputStream stream) throws IOException { dbgLog.fine("***** decodeRecordType6(): start *****"); try {// www.j a va 2 s . c om if (stream == null) { throw new IllegalArgumentException("stream == null!"); } // this secton may not exit so first check the 4-byte header value //if (stream.markSupported()){ stream.mark(1000); //} // 6.0 check the first 4 bytes byte[] headerCodeRt6 = new byte[LENGTH_RECORD_TYPE6_CODE]; int nbytes_rt6 = stream.read(headerCodeRt6, 0, LENGTH_RECORD_TYPE6_CODE); // to-do check against nbytes //printHexDump(headerCodeRt6, "RT6 header test"); ByteBuffer bb_header_code_rt6 = ByteBuffer.wrap(headerCodeRt6, 0, LENGTH_RECORD_TYPE6_CODE); if (isLittleEndian) { bb_header_code_rt6.order(ByteOrder.LITTLE_ENDIAN); } int intRT6test = bb_header_code_rt6.getInt(); dbgLog.fine("RT6: header test value=" + intRT6test); if (intRT6test != 6) { //if (stream.markSupported()){ //out.print("iteration="+safteyCounter); //dbgLog.fine("iteration="+safteyCounter); dbgLog.fine("intRT6test failed=" + intRT6test); stream.reset(); return; //} } // 6.1 check 4-byte integer that tells how many lines follow byte[] length_how_many_line_bytes = new byte[LENGTH_RT6_HOW_MANY_LINES]; int nbytes_rt6_1 = stream.read(length_how_many_line_bytes, 0, LENGTH_RT6_HOW_MANY_LINES); // to-do check against nbytes //printHexDump(length_how_many_line_bytes, "RT6 how_many_line_bytes"); ByteBuffer bb_how_many_lines = ByteBuffer.wrap(length_how_many_line_bytes, 0, LENGTH_RT6_HOW_MANY_LINES); if (isLittleEndian) { bb_how_many_lines.order(ByteOrder.LITTLE_ENDIAN); } int howManyLinesRt6 = bb_how_many_lines.getInt(); dbgLog.fine("how Many lines follow=" + howManyLinesRt6); // 6.2 read 80-char-long lines String[] documentRecord = new String[howManyLinesRt6]; for (int i = 0; i < howManyLinesRt6; i++) { byte[] line = new byte[80]; int nbytes_rt6_line = stream.read(line); documentRecord[i] = StringUtils.stripEnd( new String(Arrays.copyOfRange(line, 0, LENGTH_RT6_DOCUMENT_LINE), defaultCharSet), " "); dbgLog.fine(i + "-th line =" + documentRecord[i] + "<-"); } dbgLog.fine("documentRecord:\n" + StringUtils.join(documentRecord, "\n")); } catch (IOException ex) { //ex.printStackTrace(); throw ex; } dbgLog.fine("***** decodeRecordType6(): end *****"); }