List of usage examples for java.nio ByteBuffer getDouble
public abstract double getDouble();
From source file:org.wso2.carbon.analytics.data.commons.utils.AnalyticsCommonUtils.java
public static Map<String, Object> decodeRecordValues(byte[] data, Set<String> columns) throws AnalyticsException { /* using LinkedHashMap to retain the column order */ Map<String, Object> result = new LinkedHashMap<>(); int type, size; String colName;/* www . j a v a 2s . c o m*/ Object value; byte[] buff; byte boolVal; byte[] binData; try { ByteBuffer buffer = ByteBuffer.wrap(data); while (buffer.remaining() > 0) { size = buffer.getInt(); if (size == 0) { break; } buff = new byte[size]; buffer.get(buff, 0, size); colName = new String(buff, StandardCharsets.UTF_8); type = buffer.get(); switch (type) { case DATA_TYPE_STRING: size = buffer.getInt(); buff = new byte[size]; buffer.get(buff, 0, size); value = new String(buff, StandardCharsets.UTF_8); break; case DATA_TYPE_LONG: value = buffer.getLong(); break; case DATA_TYPE_DOUBLE: value = buffer.getDouble(); break; case DATA_TYPE_BOOLEAN: boolVal = buffer.get(); if (boolVal == BOOLEAN_TRUE) { value = true; } else if (boolVal == BOOLEAN_FALSE) { value = false; } else { throw new AnalyticsException("Invalid encoded boolean value: " + boolVal); } break; case DATA_TYPE_INTEGER: value = buffer.getInt(); break; case DATA_TYPE_FLOAT: value = buffer.getFloat(); break; case DATA_TYPE_BINARY: size = buffer.getInt(); binData = new byte[size]; buffer.get(binData); value = binData; break; case DATA_TYPE_OBJECT: size = buffer.getInt(); binData = new byte[size]; buffer.get(binData); value = deserializeObject(binData); break; case DATA_TYPE_NULL: value = null; break; default: throw new AnalyticsException("Unknown encoded data source type : " + type); } if (columns == null || columns.contains(colName)) { result.put(colName, value); } } } catch (Exception e) { throw new AnalyticsException("Error in decoding record values: " + e.getMessage(), e); } return result; }
From source file:org.wso2.carbon.analytics.datasource.core.util.GenericUtils.java
public static Map<String, Object> decodeRecordValues(byte[] data, Set<String> columns) throws AnalyticsException { /* using LinkedHashMap to retain the column order */ Map<String, Object> result = new LinkedHashMap<>(); int type, size; String colName;/* w ww . j ava2 s.c om*/ Object value; byte[] buff; byte boolVal; byte[] binData; try { ByteBuffer buffer = ByteBuffer.wrap(data); while (buffer.remaining() > 0) { size = buffer.getInt(); if (size == 0) { break; } buff = new byte[size]; buffer.get(buff, 0, size); colName = new String(buff, StandardCharsets.UTF_8); type = buffer.get(); switch (type) { case DATA_TYPE_STRING: size = buffer.getInt(); buff = new byte[size]; buffer.get(buff, 0, size); value = new String(buff, StandardCharsets.UTF_8); break; case DATA_TYPE_LONG: value = buffer.getLong(); break; case DATA_TYPE_DOUBLE: value = buffer.getDouble(); break; case DATA_TYPE_BOOLEAN: boolVal = buffer.get(); if (boolVal == BOOLEAN_TRUE) { value = true; } else if (boolVal == BOOLEAN_FALSE) { value = false; } else { throw new AnalyticsException("Invalid encoded boolean value: " + boolVal); } break; case DATA_TYPE_INTEGER: value = buffer.getInt(); break; case DATA_TYPE_FLOAT: value = buffer.getFloat(); break; case DATA_TYPE_BINARY: size = buffer.getInt(); binData = new byte[size]; buffer.get(binData); value = binData; break; case DATA_TYPE_OBJECT: size = buffer.getInt(); binData = new byte[size]; buffer.get(binData); value = GenericUtils.deserializeObject(binData); break; case DATA_TYPE_NULL: value = null; break; default: throw new AnalyticsException("Unknown encoded data source type : " + type); } if (columns == null || columns.contains(colName)) { result.put(colName, value); } } } catch (Exception e) { throw new AnalyticsException("Error in decoding record values: " + e.getMessage(), e); } return result; }
From source file:au.org.ala.layers.grid.GridCacheBuilder.java
static void nextRowOfFloats(float[] row, String datatype, boolean byteOrderLSB, int ncols, RandomAccessFile raf, byte[] b, float noDataValue) throws IOException { int size = 4; if (datatype.charAt(0) == 'U') { size = 1;//w w w. j a v a 2s.c om } else if (datatype.charAt(0) == 'B') { size = 1; } else if (datatype.charAt(0) == 'S') { size = 2; } else if (datatype.charAt(0) == 'I') { size = 4; } else if (datatype.charAt(0) == 'L') { size = 8; } else if (datatype.charAt(0) == 'F') { size = 4; } else if (datatype.charAt(0) == 'D') { size = 8; } raf.read(b, 0, size * ncols); ByteBuffer bb = ByteBuffer.wrap(b); if (byteOrderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } else { bb.order(ByteOrder.BIG_ENDIAN); } int i; int length = ncols; if (datatype.charAt(0) == 'U') { for (i = 0; i < length; i++) { float ret = bb.get(); if (ret < 0) { ret += 256; } row[i] = ret; } } else if (datatype.charAt(0) == 'B') { for (i = 0; i < length; i++) { row[i] = (float) bb.get(); } } else if (datatype.charAt(0) == 'S') { for (i = 0; i < length; i++) { row[i] = (float) bb.getShort(); } } else if (datatype.charAt(0) == 'I') { for (i = 0; i < length; i++) { row[i] = (float) bb.getInt(); } } else if (datatype.charAt(0) == 'L') { for (i = 0; i < length; i++) { row[i] = (float) bb.getLong(); } } else if (datatype.charAt(0) == 'F') { for (i = 0; i < length; i++) { row[i] = (float) bb.getFloat(); } } else if (datatype.charAt(0) == 'D') { for (i = 0; i < length; i++) { row[i] = (float) bb.getDouble(); } } else { logger.info("UNKNOWN TYPE: " + datatype); } for (i = 0; i < length; i++) { if (row[i] == noDataValue) { row[i] = Float.NaN; } } }
From source file:io.github.msdk.io.mzdata.MzDataSaxHandler.java
/** * {@inheritDoc}/*from www. j a v a 2s .c om*/ * * endElement() */ @SuppressWarnings("null") public void endElement(String namespaceURI, String sName, String qName) throws SAXException { if (canceled) throw new SAXException("Parsing Cancelled"); // <spectrumInstrument> if (qName.equalsIgnoreCase("spectrumInstrument")) { spectrumInstrumentFlag = false; } // <precursor> if (qName.equalsIgnoreCase("precursor")) { precursorFlag = false; } // <spectrum> if (qName.equalsIgnoreCase("spectrum")) { spectrumInstrumentFlag = false; // Auto-detect whether this scan is centroided MsSpectrumType spectrumType = SpectrumTypeDetectionAlgorithm.detectSpectrumType(mzBuffer, intensityBuffer, peaksCount); // Create a new scan MsFunction msFunction = MSDKObjectBuilder.getMsFunction(msLevel); MsScan newScan = MSDKObjectBuilder.getMsScan(dataStore, scanNumber, msFunction); newScan.setDataPoints(mzBuffer, intensityBuffer, peaksCount); newScan.setSpectrumType(spectrumType); newScan.setPolarity(polarity); if (retentionTime != null) { ChromatographyInfo chromInfo = MSDKObjectBuilder.getChromatographyInfo1D(SeparationType.UNKNOWN, retentionTime); newScan.setChromatographyInfo(chromInfo); } if (precursorMz != null) { IsolationInfo isolation = MSDKObjectBuilder.getIsolationInfo(Range.singleton(precursorMz), null, precursorMz, precursorCharge, null); newScan.getIsolations().add(isolation); } // Add the scan to the file newRawFile.addScan(newScan); parsedScans++; } // <mzArrayBinary> if (qName.equalsIgnoreCase("mzArrayBinary")) { mzArrayBinaryFlag = false; // Allocate space for the whole array if (mzBuffer.length < peaksCount) mzBuffer = new double[peaksCount * 2]; byte[] peakBytes = Base64.decodeBase64(charBuffer.toString().getBytes()); ByteBuffer currentMzBytes = ByteBuffer.wrap(peakBytes); if (endian.equals("big")) { currentMzBytes = currentMzBytes.order(ByteOrder.BIG_ENDIAN); } else { currentMzBytes = currentMzBytes.order(ByteOrder.LITTLE_ENDIAN); } for (int i = 0; i < peaksCount; i++) { if (precision == null || precision.equals("32")) mzBuffer[i] = (double) currentMzBytes.getFloat(); else mzBuffer[i] = currentMzBytes.getDouble(); } } // <intenArrayBinary> if (qName.equalsIgnoreCase("intenArrayBinary")) { intenArrayBinaryFlag = false; // Allocate space for the whole array if (intensityBuffer.length < peaksCount) intensityBuffer = new float[peaksCount * 2]; byte[] peakBytes = Base64.decodeBase64(charBuffer.toString().getBytes()); ByteBuffer currentIntensityBytes = ByteBuffer.wrap(peakBytes); if (endian.equals("big")) { currentIntensityBytes = currentIntensityBytes.order(ByteOrder.BIG_ENDIAN); } else { currentIntensityBytes = currentIntensityBytes.order(ByteOrder.LITTLE_ENDIAN); } for (int i = 0; i < peaksCount; i++) { if (precision == null || precision.equals("32")) intensityBuffer[i] = currentIntensityBytes.getFloat(); else intensityBuffer[i] = (float) currentIntensityBytes.getDouble(); } } }
From source file:org.apache.carbondata.core.util.CarbonUtil.java
public static ValueEncoderMeta deserializeEncoderMetaNew(byte[] encodeMeta) { ByteBuffer buffer = ByteBuffer.wrap(encodeMeta); char measureType = buffer.getChar(); ValueEncoderMeta valueEncoderMeta = new ValueEncoderMeta(); valueEncoderMeta.setType(measureType); switch (measureType) { case CarbonCommonConstants.DOUBLE_MEASURE: valueEncoderMeta.setMaxValue(buffer.getDouble()); valueEncoderMeta.setMinValue(buffer.getDouble()); valueEncoderMeta.setUniqueValue(buffer.getDouble()); break;//from w ww. j a v a2 s .c om case CarbonCommonConstants.BIG_DECIMAL_MEASURE: valueEncoderMeta.setMaxValue(0.0); valueEncoderMeta.setMinValue(0.0); valueEncoderMeta.setUniqueValue(0.0); break; case CarbonCommonConstants.BIG_INT_MEASURE: valueEncoderMeta.setMaxValue(buffer.getLong()); valueEncoderMeta.setMinValue(buffer.getLong()); valueEncoderMeta.setUniqueValue(buffer.getLong()); break; default: throw new IllegalArgumentException("invalid measure type"); } valueEncoderMeta.setDecimal(buffer.getInt()); valueEncoderMeta.setDataTypeSelected(buffer.get()); return valueEncoderMeta; }
From source file:MSUmpire.SpectrumParser.mzXMLReadUnit.java
private void ParsePeakString(ScanData scan, String peakString) throws IOException, DataFormatException { int offset;/*from ww w . j a v a 2s .com*/ peakString = peakString.replaceAll("\n", ""); byte[] decoded = Base64.decodeBase64(peakString.getBytes()); if ("zlib".equals(scan.compressionType)) { decoded = ZlibUncompressBuffer(decoded); } switch (scan.precision) { case (32): { offset = 0; for (int i = 0; i < scan.PeaksCountString; i++) { byte[] mz = new byte[] { decoded[offset], decoded[offset + 1], decoded[offset + 2], decoded[offset + 3] }; byte[] intensity = new byte[] { decoded[offset + 4], decoded[offset + 5], decoded[offset + 6], decoded[offset + 7] }; ByteBuffer mzBuffer = ByteBuffer.wrap(mz); ByteBuffer intBuffer = ByteBuffer.wrap(intensity); float intensityfloat = intBuffer.getFloat(); float mzfloat = mzBuffer.getFloat(); if (intensityfloat > 0f) { scan.AddPoint(mzfloat, intensityfloat); } mz = null; intensity = null; mzBuffer.clear(); intBuffer.clear(); mzBuffer = null; intBuffer = null; offset += 8; } break; } case (64): { offset = 0; for (int i = 0; i < scan.PeaksCountString; i++) { byte[] mz = new byte[] { decoded[offset], decoded[offset + 1], decoded[offset + 2], decoded[offset + 3], decoded[offset + 4], decoded[offset + 5], decoded[offset + 6], decoded[offset + 7] }; byte[] intensity = new byte[] { decoded[offset + 8], decoded[offset + 9], decoded[offset + 10], decoded[offset + 11], decoded[offset + 12], decoded[offset + 13], decoded[offset + 14], decoded[offset + 15] }; ByteBuffer mzBuffer = ByteBuffer.wrap(mz); ByteBuffer intBuffer = ByteBuffer.wrap(intensity); float intensityfloat = (float) intBuffer.getDouble(); float mzfloat = (float) mzBuffer.getDouble(); if (intensityfloat > 0f) { scan.AddPoint(mzfloat, intensityfloat); } mz = null; intensity = null; mzBuffer.clear(); intBuffer.clear(); mzBuffer = null; intBuffer = null; offset += 16; } break; } } peakString = null; decoded = null; }
From source file:hivemall.mf.OnlineMatrixFactorizationUDTF.java
protected final void runIterativeTraining(@Nonnegative final int iterations) throws HiveException { final ByteBuffer inputBuf = this.inputBuf; final NioFixedSegment fileIO = this.fileIO; assert (inputBuf != null); assert (fileIO != null); final long numTrainingExamples = count; final Reporter reporter = getReporter(); final Counter iterCounter = (reporter == null) ? null : reporter.getCounter("hivemall.mf.MatrixFactorization$Counter", "iteration"); try {//from w w w . j av a 2s. c o m if (lastWritePos == 0) {// run iterations w/o temporary file if (inputBuf.position() == 0) { return; // no training example } inputBuf.flip(); int iter = 2; for (; iter <= iterations; iter++) { reportProgress(reporter); setCounterValue(iterCounter, iter); while (inputBuf.remaining() > 0) { int user = inputBuf.getInt(); int item = inputBuf.getInt(); double rating = inputBuf.getDouble(); // invoke train count++; train(user, item, rating); } cvState.multiplyLoss(0.5d); if (cvState.isConverged(iter, numTrainingExamples)) { break; } inputBuf.rewind(); } logger.info("Performed " + Math.min(iter, iterations) + " iterations of " + NumberUtils.formatNumber(numTrainingExamples) + " training examples on memory (thus " + NumberUtils.formatNumber(count) + " training updates in total) "); } else {// read training examples in the temporary file and invoke train for each example // write training examples in buffer to a temporary file if (inputBuf.position() > 0) { writeBuffer(inputBuf, fileIO, lastWritePos); } else if (lastWritePos == 0) { return; // no training example } try { fileIO.flush(); } catch (IOException e) { throw new HiveException("Failed to flush a file: " + fileIO.getFile().getAbsolutePath(), e); } if (logger.isInfoEnabled()) { File tmpFile = fileIO.getFile(); logger.info( "Wrote " + numTrainingExamples + " records to a temporary file for iterative training: " + tmpFile.getAbsolutePath() + " (" + FileUtils.prettyFileSize(tmpFile) + ")"); } // run iterations int iter = 2; for (; iter <= iterations; iter++) { setCounterValue(iterCounter, iter); inputBuf.clear(); long seekPos = 0L; while (true) { reportProgress(reporter); // TODO prefetch // writes training examples to a buffer in the temporary file final int bytesRead; try { bytesRead = fileIO.read(seekPos, inputBuf); } catch (IOException e) { throw new HiveException("Failed to read a file: " + fileIO.getFile().getAbsolutePath(), e); } if (bytesRead == 0) { // reached file EOF break; } assert (bytesRead > 0) : bytesRead; seekPos += bytesRead; // reads training examples from a buffer inputBuf.flip(); int remain = inputBuf.remaining(); assert (remain > 0) : remain; for (; remain >= RECORD_BYTES; remain -= RECORD_BYTES) { int user = inputBuf.getInt(); int item = inputBuf.getInt(); double rating = inputBuf.getDouble(); // invoke train count++; train(user, item, rating); } inputBuf.compact(); } cvState.multiplyLoss(0.5d); if (cvState.isConverged(iter, numTrainingExamples)) { break; } } logger.info("Performed " + Math.min(iter, iterations) + " iterations of " + NumberUtils.formatNumber(numTrainingExamples) + " training examples using a secondary storage (thus " + NumberUtils.formatNumber(count) + " training updates in total)"); } } finally { // delete the temporary file and release resources try { fileIO.close(true); } catch (IOException e) { throw new HiveException("Failed to close a file: " + fileIO.getFile().getAbsolutePath(), e); } this.inputBuf = null; this.fileIO = null; } }
From source file:hivemall.fm.FactorizationMachineUDTF.java
protected void runTrainingIteration(int iterations) throws HiveException { final ByteBuffer inputBuf = this._inputBuf; final NioStatefullSegment fileIO = this._fileIO; assert (inputBuf != null); assert (fileIO != null); final long numTrainingExamples = _t; final boolean adaregr = _va_rand != null; final Reporter reporter = getReporter(); final Counter iterCounter = (reporter == null) ? null : reporter.getCounter("hivemall.fm.FactorizationMachines$Counter", "iteration"); try {// w w w .java 2s . c o m if (fileIO.getPosition() == 0L) {// run iterations w/o temporary file if (inputBuf.position() == 0) { return; // no training example } inputBuf.flip(); int iter = 2; for (; iter <= iterations; iter++) { reportProgress(reporter); setCounterValue(iterCounter, iter); while (inputBuf.remaining() > 0) { int bytes = inputBuf.getInt(); assert (bytes > 0) : bytes; int xLength = inputBuf.getInt(); final Feature[] x = new Feature[xLength]; for (int j = 0; j < xLength; j++) { x[j] = instantiateFeature(inputBuf); } double y = inputBuf.getDouble(); // invoke train ++_t; train(x, y, adaregr); } if (_cvState.isConverged(iter, numTrainingExamples)) { break; } inputBuf.rewind(); } LOG.info("Performed " + Math.min(iter, iterations) + " iterations of " + NumberUtils.formatNumber(numTrainingExamples) + " training examples on memory (thus " + NumberUtils.formatNumber(_t) + " training updates in total) "); } else {// read training examples in the temporary file and invoke train for each example // write training examples in buffer to a temporary file if (inputBuf.remaining() > 0) { writeBuffer(inputBuf, fileIO); } try { fileIO.flush(); } catch (IOException e) { throw new HiveException("Failed to flush a file: " + fileIO.getFile().getAbsolutePath(), e); } if (LOG.isInfoEnabled()) { File tmpFile = fileIO.getFile(); LOG.info( "Wrote " + numTrainingExamples + " records to a temporary file for iterative training: " + tmpFile.getAbsolutePath() + " (" + FileUtils.prettyFileSize(tmpFile) + ")"); } // run iterations int iter = 2; for (; iter <= iterations; iter++) { setCounterValue(iterCounter, iter); inputBuf.clear(); fileIO.resetPosition(); while (true) { reportProgress(reporter); // TODO prefetch // writes training examples to a buffer in the temporary file final int bytesRead; try { bytesRead = fileIO.read(inputBuf); } catch (IOException e) { throw new HiveException("Failed to read a file: " + fileIO.getFile().getAbsolutePath(), e); } if (bytesRead == 0) { // reached file EOF break; } assert (bytesRead > 0) : bytesRead; // reads training examples from a buffer inputBuf.flip(); int remain = inputBuf.remaining(); if (remain < INT_BYTES) { throw new HiveException("Illegal file format was detected"); } while (remain >= INT_BYTES) { int pos = inputBuf.position(); int recordBytes = inputBuf.getInt(); remain -= INT_BYTES; if (remain < recordBytes) { inputBuf.position(pos); break; } final int xLength = inputBuf.getInt(); final Feature[] x = new Feature[xLength]; for (int j = 0; j < xLength; j++) { x[j] = instantiateFeature(inputBuf); } double y = inputBuf.getDouble(); // invoke training ++_t; train(x, y, adaregr); remain -= recordBytes; } inputBuf.compact(); } if (_cvState.isConverged(iter, numTrainingExamples)) { break; } } LOG.info("Performed " + Math.min(iter, iterations) + " iterations of " + NumberUtils.formatNumber(numTrainingExamples) + " training examples on a secondary storage (thus " + NumberUtils.formatNumber(_t) + " training updates in total)"); } } finally { // delete the temporary file and release resources try { fileIO.close(true); } catch (IOException e) { throw new HiveException("Failed to close a file: " + fileIO.getFile().getAbsolutePath(), e); } this._inputBuf = null; this._fileIO = null; } }
From source file:com.healthmarketscience.jackcess.impl.ColumnImpl.java
/** * Deserialize a raw byte value for this column into an Object * @param data The raw byte value//from w ww. jav a 2 s.co m * @param order Byte order in which the raw value is stored * @return The deserialized Object * @usage _advanced_method_ */ public Object read(byte[] data, ByteOrder order) throws IOException { ByteBuffer buffer = ByteBuffer.wrap(data).order(order); switch (getType()) { case BOOLEAN: throw new IOException("Tried to read a boolean from data instead of null mask."); case BYTE: return Byte.valueOf(buffer.get()); case INT: return Short.valueOf(buffer.getShort()); case LONG: return Integer.valueOf(buffer.getInt()); case DOUBLE: return Double.valueOf(buffer.getDouble()); case FLOAT: return Float.valueOf(buffer.getFloat()); case SHORT_DATE_TIME: return readDateValue(buffer); case BINARY: return data; case TEXT: return decodeTextValue(data); case MONEY: return readCurrencyValue(buffer); case NUMERIC: return readNumericValue(buffer); case GUID: return readGUIDValue(buffer, order); case UNKNOWN_0D: case UNKNOWN_11: // treat like "binary" data return data; case COMPLEX_TYPE: return new ComplexValueForeignKeyImpl(this, buffer.getInt()); default: throw new IOException("Unrecognized data type: " + _type); } }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.dta.DTAFileReader.java
private void decodeData(BufferedInputStream stream) throws IOException { dbgLog.fine("\n***** decodeData(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }// w w w . ja v a 2 s . co m int nvar = (Integer) smd.getFileInformation().get("varQnty"); int nobs = (Integer) smd.getFileInformation().get("caseQnty"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("data diminsion[rxc]=(" + nobs + "," + nvar + ")"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("bytes per row=" + bytes_per_row + " bytes"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("variableTypelList=" + Arrays.deepToString(variableTypelList)); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("StringVariableTable=" + StringVariableTable); FileOutputStream fileOutTab = null; PrintWriter pwout = null; // create a File object to save the tab-delimited data file File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab"); String tabDelimitedDataFileName = tabDelimitedDataFile.getAbsolutePath(); // save the temp file name in the metadata object smd.getFileInformation().put("tabDelimitedDataFileLocation", tabDelimitedDataFileName); fileOutTab = new FileOutputStream(tabDelimitedDataFile); pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); // data storage // Object[][] dataTable = new Object[nobs][nvar]; // for later variable-wise calculations of statistics // dataTable2 sotres cut-out data columnwise Object[][] dataTable2 = new Object[nvar][nobs]; String[][] dateFormat = new String[nvar][nobs]; for (int i = 0; i < nobs; i++) { byte[] dataRowBytes = new byte[bytes_per_row]; Object[] dataRow = new Object[nvar]; int nbytes = stream.read(dataRowBytes, 0, bytes_per_row); if (nbytes == 0) { String errorMessage = "reading data: no data were read at(" + i + "th row)"; throw new IOException(errorMessage); } // decoding each row int byte_offset = 0; for (int columnCounter = 0; columnCounter < variableTypelList.length; columnCounter++) { Integer varType = variableTypeMap.get(variableTypelList[columnCounter]); String variableFormat = variableFormats[columnCounter]; boolean isDateTimeDatum = isDateTimeDatumList[columnCounter]; switch (varType != null ? varType : 256) { case -5: // Byte case // note: 1 byte signed byte byte_datum = dataRowBytes[byte_offset]; if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column byte =" + byte_datum); if (byte_datum >= BYTE_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum); dataRow[columnCounter] = MissingValueForTextDataFileNumeric; dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF } else { dataRow[columnCounter] = byte_datum; dataTable2[columnCounter][i] = byte_datum; } byte_offset++; break; case -4: // Stata-int (=java's short: 2byte) case // note: 2-byte signed int, not java's int ByteBuffer int_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 2); if (isLittleEndian) { int_buffer.order(ByteOrder.LITTLE_ENDIAN); } short short_datum = int_buffer.getShort(); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column stata int =" + short_datum); if (short_datum >= INT_MISSIG_VALUE) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column stata long missing value=" + short_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("short", variableFormat, Short.toString(short_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = short_datum; dataRow[columnCounter] = short_datum; } } byte_offset += 2; break; case -3: // stata-Long (= java's int: 4 byte) case // note: 4-byte singed, not java's long dbgLog.fine("DATreader: stata long"); ByteBuffer long_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { long_buffer.order(ByteOrder.LITTLE_ENDIAN); } int int_datum = long_buffer.getInt(); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row " + columnCounter + "=th column stata long =" + int_datum); if (int_datum >= LONG_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row " + columnCounter + "=th column stata long missing value=" + int_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("int", variableFormat, Integer.toString(int_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = int_datum; dataRow[columnCounter] = int_datum; } } byte_offset += 4; break; case -2: // float case // note: 4-byte ByteBuffer float_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { float_buffer.order(ByteOrder.LITTLE_ENDIAN); } float float_datum = float_buffer.getFloat(); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column float =" + float_datum); if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column float missing value=" + float_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("float", variableFormat, doubleNumberFormatter.format(float_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = float_datum; dataRow[columnCounter] = float_datum; } } byte_offset += 4; break; case -1: // double case // note: 8-byte ByteBuffer double_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 8); if (isLittleEndian) { double_buffer.order(ByteOrder.LITTLE_ENDIAN); } double double_datum = double_buffer.getDouble(); if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) { dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column double missing value=" + double_datum); if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("double", variableFormat, doubleNumberFormatter.format(double_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = double_datum; dataRow[columnCounter] = doubleNumberFormatter.format(double_datum); } } byte_offset += 8; break; case 0: // String case int strVarLength = StringVariableTable.get(columnCounter); String raw_datum = new String( Arrays.copyOfRange(dataRowBytes, byte_offset, (byte_offset + strVarLength)), "ISO-8859-1"); String string_datum = getNullStrippedString(raw_datum); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column string =" + string_datum); if (string_datum.equals("")) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column string missing value=" + string_datum); dataRow[columnCounter] = MissingValueForTextDataFileString; dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF } else { String escapedString = string_datum.replaceAll("\"", Matcher.quoteReplacement("\\\"")); /* * Fixing the bug we've had in the Stata reader for * a longest time: new lines and tabs need to * be escaped too - otherwise it breaks our * TAB file structure! -- L.A. */ escapedString = escapedString.replaceAll("\t", Matcher.quoteReplacement("\\t")); escapedString = escapedString.replaceAll("\n", Matcher.quoteReplacement("\\n")); escapedString = escapedString.replaceAll("\r", Matcher.quoteReplacement("\\r")); // the escaped version of the string will be // stored in the tab file: dataRow[columnCounter] = "\"" + escapedString + "\""; // but note that the "raw" version of it is // used for the UNF: dataTable2[columnCounter][i] = string_datum; } byte_offset += strVarLength; break; default: dbgLog.fine("unknown variable type found"); String errorMessage = "unknow variable Type found at data section"; throw new InvalidObjectException(errorMessage); } // switch } // for-columnCounter // dump the row of data to the external file pwout.println(StringUtils.join(dataRow, "\t")); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row's data={" + StringUtils.join(dataRow, ",") + "};"); } // for- i (row) pwout.close(); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer("\ndataTable2(variable-wise):\n"); dbgLog.finer(Arrays.deepToString(dataTable2)); dbgLog.finer("\ndateFormat(variable-wise):\n"); dbgLog.finer(Arrays.deepToString(dateFormat)); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("variableTypelList:\n" + Arrays.deepToString(variableTypelList)); dbgLog.fine("variableTypelListFinal:\n" + Arrays.deepToString(variableTypelListFinal)); } String[] unfValues = new String[nvar]; for (int j = 0; j < nvar; j++) { String variableType_j = variableTypelListFinal[j]; unfValues[j] = getUNF(dataTable2[j], dateFormat[j], variableType_j, unfVersionNumber, j); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(j + "th unf value" + unfValues[j]); } if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("unf set:\n" + Arrays.deepToString(unfValues)); fileUnfValue = UNF5Util.calculateUNF(unfValues); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("file-unf=" + fileUnfValue); stataDataSection.setUnf(unfValues); stataDataSection.setFileUnf(fileUnfValue); smd.setVariableUNF(unfValues); smd.getFileInformation().put("fileUNF", fileUnfValue); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("unf values:\n" + unfValues); stataDataSection.setData(dataTable2); // close the stream dbgLog.fine("***** decodeData(): end *****\n\n"); }