List of usage examples for java.nio ByteBuffer getFloat
public abstract float getFloat();
From source file:hivemall.GeneralLearnerBaseUDTF.java
protected final void runIterativeTraining(@Nonnegative final int iterations) throws HiveException { final ByteBuffer buf = this.inputBuf; final NioStatefulSegment dst = this.fileIO; assert (buf != null); assert (dst != null); final long numTrainingExamples = count; final Reporter reporter = getReporter(); final Counters.Counter iterCounter = (reporter == null) ? null : reporter.getCounter("hivemall.GeneralLearnerBase$Counter", "iteration"); try {/*from ww w . j a va 2 s. c o m*/ if (dst.getPosition() == 0L) {// run iterations w/o temporary file if (buf.position() == 0) { return; // no training example } buf.flip(); for (int iter = 2; iter <= iterations; iter++) { cvState.next(); reportProgress(reporter); setCounterValue(iterCounter, iter); while (buf.remaining() > 0) { int recordBytes = buf.getInt(); assert (recordBytes > 0) : recordBytes; int featureVectorLength = buf.getInt(); final FeatureValue[] featureVector = new FeatureValue[featureVectorLength]; for (int j = 0; j < featureVectorLength; j++) { featureVector[j] = readFeatureValue(buf, featureType); } float target = buf.getFloat(); train(featureVector, target); } buf.rewind(); if (is_mini_batch) { // Update model with accumulated delta batchUpdate(); } if (cvState.isConverged(numTrainingExamples)) { break; } } logger.info("Performed " + cvState.getCurrentIteration() + " iterations of " + NumberUtils.formatNumber(numTrainingExamples) + " training examples on memory (thus " + NumberUtils.formatNumber(numTrainingExamples * cvState.getCurrentIteration()) + " training updates in total) "); } else {// read training examples in the temporary file and invoke train for each example // write training examples in buffer to a temporary file if (buf.remaining() > 0) { writeBuffer(buf, dst); } try { dst.flush(); } catch (IOException e) { throw new HiveException("Failed to flush a file: " + dst.getFile().getAbsolutePath(), e); } if (logger.isInfoEnabled()) { File tmpFile = dst.getFile(); logger.info( "Wrote " + numTrainingExamples + " records to a temporary file for iterative training: " + tmpFile.getAbsolutePath() + " (" + FileUtils.prettyFileSize(tmpFile) + ")"); } // run iterations for (int iter = 2; iter <= iterations; iter++) { cvState.next(); setCounterValue(iterCounter, iter); buf.clear(); dst.resetPosition(); while (true) { reportProgress(reporter); // TODO prefetch // writes training examples to a buffer in the temporary file final int bytesRead; try { bytesRead = dst.read(buf); } catch (IOException e) { throw new HiveException("Failed to read a file: " + dst.getFile().getAbsolutePath(), e); } if (bytesRead == 0) { // reached file EOF break; } assert (bytesRead > 0) : bytesRead; // reads training examples from a buffer buf.flip(); int remain = buf.remaining(); if (remain < SizeOf.INT) { throw new HiveException("Illegal file format was detected"); } while (remain >= SizeOf.INT) { int pos = buf.position(); int recordBytes = buf.getInt(); remain -= SizeOf.INT; if (remain < recordBytes) { buf.position(pos); break; } int featureVectorLength = buf.getInt(); final FeatureValue[] featureVector = new FeatureValue[featureVectorLength]; for (int j = 0; j < featureVectorLength; j++) { featureVector[j] = readFeatureValue(buf, featureType); } float target = buf.getFloat(); train(featureVector, target); remain -= recordBytes; } buf.compact(); } if (is_mini_batch) { // Update model with accumulated delta batchUpdate(); } if (cvState.isConverged(numTrainingExamples)) { break; } } logger.info("Performed " + cvState.getCurrentIteration() + " iterations of " + NumberUtils.formatNumber(numTrainingExamples) + " training examples on a secondary storage (thus " + NumberUtils.formatNumber(numTrainingExamples * cvState.getCurrentIteration()) + " training updates in total)"); } } catch (Throwable e) { throw new HiveException("Exception caused in the iterative training", e); } finally { // delete the temporary file and release resources try { dst.close(true); } catch (IOException e) { throw new HiveException("Failed to close a file: " + dst.getFile().getAbsolutePath(), e); } this.inputBuf = null; this.fileIO = null; } }
From source file:com.healthmarketscience.jackcess.impl.ColumnImpl.java
/** * Deserialize a raw byte value for this column into an Object * @param data The raw byte value// ww w . ja v a2s . co m * @param order Byte order in which the raw value is stored * @return The deserialized Object * @usage _advanced_method_ */ public Object read(byte[] data, ByteOrder order) throws IOException { ByteBuffer buffer = ByteBuffer.wrap(data).order(order); switch (getType()) { case BOOLEAN: throw new IOException("Tried to read a boolean from data instead of null mask."); case BYTE: return Byte.valueOf(buffer.get()); case INT: return Short.valueOf(buffer.getShort()); case LONG: return Integer.valueOf(buffer.getInt()); case DOUBLE: return Double.valueOf(buffer.getDouble()); case FLOAT: return Float.valueOf(buffer.getFloat()); case SHORT_DATE_TIME: return readDateValue(buffer); case BINARY: return data; case TEXT: return decodeTextValue(data); case MONEY: return readCurrencyValue(buffer); case NUMERIC: return readNumericValue(buffer); case GUID: return readGUIDValue(buffer, order); case UNKNOWN_0D: case UNKNOWN_11: // treat like "binary" data return data; case COMPLEX_TYPE: return new ComplexValueForeignKeyImpl(this, buffer.getInt()); default: throw new IOException("Unrecognized data type: " + _type); } }
From source file:au.org.ala.layers.intersect.Grid.java
/** * @return calculated min and max values of a grid file as float [] where [0] is min and [1] is max. *//* www .ja va 2s . c o m*/ public float[] calculatetMinMax() { float[] ret = new float[2]; ret[0] = Float.MAX_VALUE; ret[1] = Float.MAX_VALUE * -1; long i; int size; byte[] b; RandomAccessFile afile = null; try { //read of random access file can throw an exception File f2 = new File(filename + ".GRI"); if (!f2.exists()) { afile = new RandomAccessFile(filename + ".gri", "r"); } else { afile = new RandomAccessFile(filename + ".GRI", "r"); } long length = ((long) nrows) * ((long) ncols); float f; if (datatype.equalsIgnoreCase("BYTE")) { size = 1; b = new byte[size]; for (i = 0; i < length; i++) { f = afile.readByte(); if (f != (float) nodatavalue) { ret[0] = Math.min(f * rescale, ret[0]); ret[1] = Math.max(f * rescale, ret[1]); } } } else if (datatype.equalsIgnoreCase("UBYTE")) { size = 1; b = new byte[size]; for (i = 0; i < length; i++) { f = afile.readByte(); if (f < 0) { f += 256; } if (f != (float) nodatavalue) { ret[0] = Math.min(f * rescale, ret[0]); ret[1] = Math.max(f * rescale, ret[1]); } } } else if (datatype.equalsIgnoreCase("SHORT")) { size = 2; b = new byte[size]; for (i = 0; i < length; i++) { afile.read(b); if (byteorderLSB) { f = (short) (((0xFF & b[1]) << 8) | (b[0] & 0xFF)); } else { f = (short) (((0xFF & b[0]) << 8) | (b[1] & 0xFF)); } if (f != (float) nodatavalue) { ret[0] = Math.min(f * rescale, ret[0]); ret[1] = Math.max(f * rescale, ret[1]); } } } else if (datatype.equalsIgnoreCase("INT")) { size = 4; b = new byte[size]; for (i = 0; i < length; i++) { afile.read(b); if (byteorderLSB) { f = ((0xFF & b[3]) << 24) | ((0xFF & b[2]) << 16) + ((0xFF & b[1]) << 8) + (b[0] & 0xFF); } else { f = ((0xFF & b[0]) << 24) | ((0xFF & b[1]) << 16) + ((0xFF & b[2]) << 8) + ((0xFF & b[3]) & 0xFF); } if (f != (float) nodatavalue) { ret[0] = Math.min(f * rescale, ret[0]); ret[1] = Math.max(f * rescale, ret[1]); } } } else if (datatype.equalsIgnoreCase("LONG")) { size = 8; b = new byte[size]; for (i = 0; i < length; i++) { afile.read(b); if (byteorderLSB) { f = ((long) (0xFF & b[7]) << 56) + ((long) (0xFF & b[6]) << 48) + ((long) (0xFF & b[5]) << 40) + ((long) (0xFF & b[4]) << 32) + ((long) (0xFF & b[3]) << 24) + ((long) (0xFF & b[2]) << 16) + ((long) (0xFF & b[1]) << 8) + (0xFF & b[0]); } else { f = ((long) (0xFF & b[0]) << 56) + ((long) (0xFF & b[1]) << 48) + ((long) (0xFF & b[2]) << 40) + ((long) (0xFF & b[3]) << 32) + ((long) (0xFF & b[4]) << 24) + ((long) (0xFF & b[5]) << 16) + ((long) (0xFF & b[6]) << 8) + (0xFF & b[7]); } if (f != (float) nodatavalue) { ret[0] = Math.min(f * rescale, ret[0]); ret[1] = Math.max(f * rescale, ret[1]); } } } else if (datatype.equalsIgnoreCase("FLOAT")) { size = 4; b = new byte[size]; for (i = 0; i < length; i++) { afile.read(b); ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } f = bb.getFloat(); if (f != (float) nodatavalue) { ret[0] = Math.min(f * rescale, ret[0]); ret[1] = Math.max(f * rescale, ret[1]); } } } else if (datatype.equalsIgnoreCase("DOUBLE")) { size = 8; b = new byte[8]; for (i = 0; i < length; i++) { afile.read(b); ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } f = (float) bb.getDouble(); if (f != (float) nodatavalue) { ret[0] = Math.min(f * rescale, ret[0]); ret[1] = Math.max(f * rescale, ret[1]); } } } else { logger.error("datatype not supported in Grid.getValues: " + datatype); } } catch (Exception e) { logger.error("error calculating min/max of a grid file", e); } finally { if (afile != null) { try { afile.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } return ret; }
From source file:org.shaman.terrain.polygonal.GraphToHeightmap.java
/** * Renders the given scene in a top-down manner in the given matrix * * @param matrix/*from w ww .j a va 2s.c o m*/ * @param scene */ private void render(float[][] matrix, final Spatial scene, final ColorRGBA background, float min, float max) { final ByteBuffer data = BufferUtils.createByteBuffer(size * size * 4 * 4); try { app.enqueue(new Callable<Object>() { @Override public Object call() throws Exception { //init Camera cam = new Camera(size, size); cam.setParallelProjection(true); final ViewPort view = new ViewPort("Off", cam); view.setBackgroundColor(background); view.setClearFlags(true, true, true); final FrameBuffer buffer = new FrameBuffer(size, size, 1); buffer.setDepthBuffer(Image.Format.Depth); buffer.setColorBuffer(Image.Format.RGBA32F); view.setOutputFrameBuffer(buffer); view.attachScene(scene); //render scene.setCullHint(Spatial.CullHint.Never); scene.updateGeometricState(); view.setEnabled(true); app.getRenderManager().renderViewPort(view, 0); app.getRenderer().readFrameBufferWithFormat(buffer, data, Image.Format.RGBA32F); return new Object(); } }).get(); } catch (InterruptedException | ExecutionException ex) { Logger.getLogger(GraphToHeightmap.class.getName()).log(Level.SEVERE, "unable to render", ex); return; } data.rewind(); for (int y = 0; y < size; ++y) { for (int x = 0; x < size; ++x) { float v = data.getFloat(); v *= (max - min); v += min; matrix[x][y] = v; data.getFloat(); data.getFloat(); data.getFloat(); } } }
From source file:org.shaman.terrain.polygonal.GraphToHeightmap.java
/** * Renders the given scene in a top-down manner in the given matrix * * @param matrix//from ww w. ja v a 2 s . c o m * @param scene */ private void renderColor(float[][][] matrix, final Spatial scene, final ColorRGBA background, float min, float max) { final ByteBuffer data = BufferUtils.createByteBuffer(size * size * 4 * 4); try { app.enqueue(new Callable<Object>() { @Override public Object call() throws Exception { //init Camera cam = new Camera(size, size); cam.setParallelProjection(true); final ViewPort view = new ViewPort("Off", cam); view.setBackgroundColor(background); view.setClearFlags(true, true, true); final FrameBuffer buffer = new FrameBuffer(size, size, 1); buffer.setDepthBuffer(Image.Format.Depth); buffer.setColorBuffer(Image.Format.RGBA32F); view.setOutputFrameBuffer(buffer); view.attachScene(scene); //render scene.setCullHint(Spatial.CullHint.Never); scene.updateGeometricState(); view.setEnabled(true); app.getRenderManager().renderViewPort(view, 0); app.getRenderer().readFrameBufferWithFormat(buffer, data, Image.Format.RGBA32F); return new Object(); } }).get(); } catch (InterruptedException | ExecutionException ex) { Logger.getLogger(GraphToHeightmap.class.getName()).log(Level.SEVERE, "unable to render", ex); return; } data.rewind(); for (int y = 0; y < size; ++y) { for (int x = 0; x < size; ++x) { float v; v = data.getFloat(); v *= (max - min); v += min; matrix[x][y][0] = v; v = data.getFloat(); v *= (max - min); v += min; matrix[x][y][1] = v; v = data.getFloat(); v *= (max - min); v += min; matrix[x][y][2] = v; v = data.getFloat(); v *= (max - min); v += min; matrix[x][y][3] = v; } } }
From source file:au.org.ala.layers.intersect.Grid.java
/** * @param points input array for longitude and latitude * double[number_of_points][2] * @return array of .gri file values corresponding to the * points provided/*from ww w .j ava 2s .c o m*/ */ public float[] getValues(double[][] points) { //confirm inputs since they come from somewhere else if (points == null || points.length == 0) { return null; } //use preloaded grid data if available Grid g = Grid.getLoadedGrid(filename); if (g != null) { return g.getValues2(points); } if (subgrids != null) { return getValues3(points, Math.min(1024 * 1024, 64 * points.length)); } float[] ret = new float[points.length]; int length = points.length; long size; int i, pos; byte[] b; RandomAccessFile afile = null; File f2 = new File(filename + ".GRI"); try { //read of random access file can throw an exception if (!f2.exists()) { afile = new RandomAccessFile(filename + ".gri", "r"); } else { afile = new RandomAccessFile(filename + ".GRI", "r"); } if (datatype.equalsIgnoreCase("BYTE")) { size = 1; b = new byte[(int) size]; for (i = 0; i < length; i++) { pos = (int) getcellnumber(points[i][0], points[i][1]); if (pos >= 0) { afile.seek(pos * size); ret[i] = afile.readByte(); } else { ret[i] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("UBYTE")) { size = 1; b = new byte[(int) size]; for (i = 0; i < length; i++) { pos = (int) getcellnumber(points[i][0], points[i][1]); if (pos >= 0) { afile.seek(pos * size); ret[i] = afile.readByte(); if (ret[i] < 0) { ret[i] += 256; } } else { ret[i] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("SHORT")) { size = 2; b = new byte[(int) size]; for (i = 0; i < length; i++) { pos = (int) getcellnumber(points[i][0], points[i][1]); if (pos >= 0) { afile.seek(pos * size); afile.read(b); if (byteorderLSB) { ret[i] = (short) (((0xFF & b[1]) << 8) | (b[0] & 0xFF)); } else { ret[i] = (short) (((0xFF & b[0]) << 8) | (b[1] & 0xFF)); } //ret[i] = afile.readShort(); } else { ret[i] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("INT")) { size = 4; b = new byte[(int) size]; for (i = 0; i < length; i++) { pos = (int) getcellnumber(points[i][0], points[i][1]); if (pos >= 0) { afile.seek(pos * size); afile.read(b); if (byteorderLSB) { ret[i] = ((0xFF & b[3]) << 24) | ((0xFF & b[2]) << 16) + ((0xFF & b[1]) << 8) + (b[0] & 0xFF); } else { ret[i] = ((0xFF & b[0]) << 24) | ((0xFF & b[1]) << 16) + ((0xFF & b[2]) << 8) + ((0xFF & b[3]) & 0xFF); } //ret[i] = afile.readInt(); } else { ret[i] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("LONG")) { size = 8; b = new byte[(int) size]; for (i = 0; i < length; i++) { pos = (int) getcellnumber(points[i][0], points[i][1]); if (pos >= 0) { afile.seek(pos * size); afile.read(b); if (byteorderLSB) { ret[i] = ((long) (0xFF & b[7]) << 56) + ((long) (0xFF & b[6]) << 48) + ((long) (0xFF & b[5]) << 40) + ((long) (0xFF & b[4]) << 32) + ((long) (0xFF & b[3]) << 24) + ((long) (0xFF & b[2]) << 16) + ((long) (0xFF & b[1]) << 8) + (0xFF & b[0]); } else { ret[i] = ((long) (0xFF & b[0]) << 56) + ((long) (0xFF & b[1]) << 48) + ((long) (0xFF & b[2]) << 40) + ((long) (0xFF & b[3]) << 32) + ((long) (0xFF & b[4]) << 24) + ((long) (0xFF & b[5]) << 16) + ((long) (0xFF & b[6]) << 8) + (0xFF & b[7]); } //ret[i] = afile.readLong(); } else { ret[i] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("FLOAT")) { size = 4; b = new byte[(int) size]; for (i = 0; i < length; i++) { pos = (int) getcellnumber(points[i][0], points[i][1]); if (pos >= 0) { afile.seek(pos * size); afile.read(b); ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } ret[i] = bb.getFloat(); } else { ret[i] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("DOUBLE")) { size = 8; b = new byte[8]; for (i = 0; i < length; i++) { pos = (int) getcellnumber(points[i][0], points[i][1]); if (pos >= 0) { afile.seek(pos * size); afile.read(b); ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } ret[i] = (float) bb.getDouble(); //ret[i] = afile.readFloat(); } else { ret[i] = Float.NaN; } } } else { logger.error("datatype not supported in Grid.getValues: " + datatype); // / should not happen; catch anyway... for (i = 0; i < length; i++) { ret[i] = Float.NaN; } } //replace not a number for (i = 0; i < length; i++) { if ((float) ret[i] == (float) nodatavalue) { ret[i] = Float.NaN; } else { ret[i] *= rescale; } } } catch (Exception e) { logger.error("error getting grid file values", e); } finally { if (afile != null) { try { afile.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } return ret; }
From source file:au.org.ala.layers.intersect.Grid.java
/** * @param points input array for longitude and latitude * double[number_of_points][2] and sorted latitude then longitude * @return array of .gri file values corresponding to the * points provided/* w w w. j a v a2s . co m*/ */ public float[] getValues3(double[][] points, int bufferSize) { //confirm inputs since they come from somewhere else if (points == null || points.length == 0) { return null; } if (subgrids != null) { return getValuesSubgrids(points, bufferSize); } //use preloaded grid data if available Grid g = Grid.getLoadedGrid(filename); if (g != null && g.grid_data != null) { return g.getValues2(points); } int length = points.length; int size, i; byte[] b; RandomAccessFile afile = null; File f2 = new File(filename + ".GRI"); try { //read of random access file can throw an exception if (!f2.exists()) { afile = new RandomAccessFile(filename + ".gri", "r"); } else { afile = new RandomAccessFile(filename + ".GRI", "r"); } //do not cache subgrids (using getValues2) if (!subgrid && afile.length() < 80 * 1024 * 1024) { try { afile.close(); afile = null; } catch (Exception e) { } return getValues2(points); } byte[] buffer = new byte[bufferSize]; //must be multiple of 64 Long bufferOffset = afile.length(); float[] ret = new float[points.length]; //get cell numbers long[][] cells = new long[points.length][2]; for (int j = 0; j < points.length; j++) { if (Double.isNaN(points[j][0]) || Double.isNaN(points[j][1])) { cells[j][0] = -1; cells[j][1] = j; } else { cells[j][0] = getcellnumber(points[j][0], points[j][1]); cells[j][1] = j; } } java.util.Arrays.sort(cells, new Comparator<long[]>() { @Override public int compare(long[] o1, long[] o2) { if (o1[0] == o2[0]) { return o1[1] > o2[1] ? 1 : -1; } else { return o1[0] > o2[0] ? 1 : -1; } } }); if (datatype.equalsIgnoreCase("BYTE")) { size = 1; for (i = 0; i < length; i++) { if (i > 0 && cells[i - 1][0] == cells[i][0]) { ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]]; continue; } if (cells[i][0] >= 0) { ret[(int) cells[i][1]] = getByte(afile, buffer, bufferOffset, cells[i][0] * size); } else { ret[(int) cells[i][1]] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("UBYTE")) { size = 1; for (i = 0; i < length; i++) { if (i > 0 && cells[i - 1][0] == cells[i][0]) { ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]]; continue; } if (cells[i][0] >= 0) { ret[(int) cells[i][1]] = getByte(afile, buffer, bufferOffset, cells[i][0] * size); if (ret[(int) cells[i][1]] < 0) { ret[(int) cells[i][1]] += 256; } } else { ret[(int) cells[i][1]] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("SHORT")) { size = 2; b = new byte[size]; for (i = 0; i < length; i++) { if (i > 0 && cells[i - 1][0] == cells[i][0]) { ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]]; continue; } if (cells[i][0] >= 0) { bufferOffset = getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b); if (byteorderLSB) { ret[(int) cells[i][1]] = (short) (((0xFF & b[1]) << 8) | (b[0] & 0xFF)); } else { ret[(int) cells[i][1]] = (short) (((0xFF & b[0]) << 8) | (b[1] & 0xFF)); } } else { ret[(int) cells[i][1]] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("INT")) { size = 4; b = new byte[size]; for (i = 0; i < length; i++) { if (i > 0 && cells[i - 1][0] == cells[i][0]) { ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]]; continue; } if (cells[i][0] >= 0) { bufferOffset = getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b); if (byteorderLSB) { ret[(int) cells[i][1]] = ((0xFF & b[3]) << 24) | ((0xFF & b[2]) << 16) + ((0xFF & b[1]) << 8) + (b[0] & 0xFF); } else { ret[(int) cells[i][1]] = ((0xFF & b[0]) << 24) | ((0xFF & b[1]) << 16) + ((0xFF & b[2]) << 8) + ((0xFF & b[3]) & 0xFF); } } else { ret[(int) cells[i][1]] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("LONG")) { size = 8; b = new byte[size]; for (i = 0; i < length; i++) { if (i > 0 && cells[i - 1][0] == cells[i][0]) { ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]]; continue; } if (cells[i][0] >= 0) { bufferOffset = getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b); if (byteorderLSB) { ret[(int) cells[i][1]] = ((long) (0xFF & b[7]) << 56) + ((long) (0xFF & b[6]) << 48) + ((long) (0xFF & b[5]) << 40) + ((long) (0xFF & b[4]) << 32) + ((long) (0xFF & b[3]) << 24) + ((long) (0xFF & b[2]) << 16) + ((long) (0xFF & b[1]) << 8) + (0xFF & b[0]); } else { ret[(int) cells[i][1]] = ((long) (0xFF & b[0]) << 56) + ((long) (0xFF & b[1]) << 48) + ((long) (0xFF & b[2]) << 40) + ((long) (0xFF & b[3]) << 32) + ((long) (0xFF & b[4]) << 24) + ((long) (0xFF & b[5]) << 16) + ((long) (0xFF & b[6]) << 8) + (0xFF & b[7]); } } else { ret[(int) cells[i][1]] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("FLOAT")) { size = 4; b = new byte[size]; for (i = 0; i < length; i++) { if (i > 0 && cells[i - 1][0] == cells[i][0]) { ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]]; continue; } if (cells[i][0] >= 0) { bufferOffset = getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b); ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } ret[(int) cells[i][1]] = bb.getFloat(); } else { ret[(int) cells[i][1]] = Float.NaN; } } } else if (datatype.equalsIgnoreCase("DOUBLE")) { size = 8; b = new byte[8]; for (i = 0; i < length; i++) { if (i > 0 && cells[i - 1][0] == cells[i][0]) { ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]]; continue; } if (cells[i][0] >= 0) { getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b); ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } ret[(int) cells[i][1]] = (float) bb.getDouble(); } else { ret[(int) cells[i][1]] = Float.NaN; } } } else { logger.error("datatype not supported in Grid.getValues: " + datatype); // / should not happen; catch anyway... for (i = 0; i < length; i++) { ret[i] = Float.NaN; } } //replace not a number for (i = 0; i < length; i++) { if ((float) ret[i] == (float) nodatavalue) { ret[i] = Float.NaN; } else { ret[i] *= rescale; } } return ret; } catch (Exception e) { logger.error("error getting grid file values", e); } finally { if (afile != null) { try { afile.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } return null; }
From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.dta.DTAFileReader.java
private void decodeData(BufferedInputStream stream) throws IOException { dbgLog.fine("\n***** decodeData(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }//from w w w . j a va2 s .co m int nvar = (Integer) smd.getFileInformation().get("varQnty"); int nobs = (Integer) smd.getFileInformation().get("caseQnty"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("data diminsion[rxc]=(" + nobs + "," + nvar + ")"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("bytes per row=" + bytes_per_row + " bytes"); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("variableTypelList=" + Arrays.deepToString(variableTypelList)); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("StringVariableTable=" + StringVariableTable); FileOutputStream fileOutTab = null; PrintWriter pwout = null; // create a File object to save the tab-delimited data file File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab"); String tabDelimitedDataFileName = tabDelimitedDataFile.getAbsolutePath(); // save the temp file name in the metadata object smd.getFileInformation().put("tabDelimitedDataFileLocation", tabDelimitedDataFileName); fileOutTab = new FileOutputStream(tabDelimitedDataFile); pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); // data storage // Object[][] dataTable = new Object[nobs][nvar]; // for later variable-wise calculations of statistics // dataTable2 sotres cut-out data columnwise Object[][] dataTable2 = new Object[nvar][nobs]; String[][] dateFormat = new String[nvar][nobs]; for (int i = 0; i < nobs; i++) { byte[] dataRowBytes = new byte[bytes_per_row]; Object[] dataRow = new Object[nvar]; int nbytes = stream.read(dataRowBytes, 0, bytes_per_row); if (nbytes == 0) { String errorMessage = "reading data: no data were read at(" + i + "th row)"; throw new IOException(errorMessage); } // decoding each row int byte_offset = 0; for (int columnCounter = 0; columnCounter < variableTypelList.length; columnCounter++) { Integer varType = variableTypeMap.get(variableTypelList[columnCounter]); String variableFormat = variableFormats[columnCounter]; boolean isDateTimeDatum = isDateTimeDatumList[columnCounter]; switch (varType != null ? varType : 256) { case -5: // Byte case // note: 1 byte signed byte byte_datum = dataRowBytes[byte_offset]; if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column byte =" + byte_datum); if (byte_datum >= BYTE_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum); dataRow[columnCounter] = MissingValueForTextDataFileNumeric; dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF } else { dataRow[columnCounter] = byte_datum; dataTable2[columnCounter][i] = byte_datum; } byte_offset++; break; case -4: // Stata-int (=java's short: 2byte) case // note: 2-byte signed int, not java's int ByteBuffer int_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 2); if (isLittleEndian) { int_buffer.order(ByteOrder.LITTLE_ENDIAN); } short short_datum = int_buffer.getShort(); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column stata int =" + short_datum); if (short_datum >= INT_MISSIG_VALUE) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column stata long missing value=" + short_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("short", variableFormat, Short.toString(short_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = short_datum; dataRow[columnCounter] = short_datum; } } byte_offset += 2; break; case -3: // stata-Long (= java's int: 4 byte) case // note: 4-byte singed, not java's long dbgLog.fine("DATreader: stata long"); ByteBuffer long_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { long_buffer.order(ByteOrder.LITTLE_ENDIAN); } int int_datum = long_buffer.getInt(); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row " + columnCounter + "=th column stata long =" + int_datum); if (int_datum >= LONG_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row " + columnCounter + "=th column stata long missing value=" + int_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("int", variableFormat, Integer.toString(int_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = int_datum; dataRow[columnCounter] = int_datum; } } byte_offset += 4; break; case -2: // float case // note: 4-byte ByteBuffer float_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { float_buffer.order(ByteOrder.LITTLE_ENDIAN); } float float_datum = float_buffer.getFloat(); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column float =" + float_datum); if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column float missing value=" + float_datum); dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("float", variableFormat, doubleNumberFormatter.format(float_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = float_datum; dataRow[columnCounter] = float_datum; } } byte_offset += 4; break; case -1: // double case // note: 8-byte ByteBuffer double_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 8); if (isLittleEndian) { double_buffer.order(ByteOrder.LITTLE_ENDIAN); } double double_datum = double_buffer.getDouble(); if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) { dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column double missing value=" + double_datum); if (isDateTimeDatum) { dataRow[columnCounter] = MissingValueForTextDataFileString; } else { dataRow[columnCounter] = MissingValueForTextDataFileNumeric; } } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("double", variableFormat, doubleNumberFormatter.format(double_datum)); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); dataRow[columnCounter] = ddt.decodedDateTime; dateFormat[columnCounter][i] = ddt.format; dataTable2[columnCounter][i] = dataRow[columnCounter]; } else { dataTable2[columnCounter][i] = double_datum; dataRow[columnCounter] = doubleNumberFormatter.format(double_datum); } } byte_offset += 8; break; case 0: // String case int strVarLength = StringVariableTable.get(columnCounter); String raw_datum = new String( Arrays.copyOfRange(dataRowBytes, byte_offset, (byte_offset + strVarLength)), "ISO-8859-1"); String string_datum = getNullStrippedString(raw_datum); if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column string =" + string_datum); if (string_datum.equals("")) { if (dbgLog.isLoggable(Level.FINER)) dbgLog.finer(i + "-th row " + columnCounter + "=th column string missing value=" + string_datum); dataRow[columnCounter] = MissingValueForTextDataFileString; dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF } else { String escapedString = string_datum.replaceAll("\"", Matcher.quoteReplacement("\\\"")); /* * Fixing the bug we've had in the Stata reader for * a longest time: new lines and tabs need to * be escaped too - otherwise it breaks our * TAB file structure! -- L.A. */ escapedString = escapedString.replaceAll("\t", Matcher.quoteReplacement("\\t")); escapedString = escapedString.replaceAll("\n", Matcher.quoteReplacement("\\n")); escapedString = escapedString.replaceAll("\r", Matcher.quoteReplacement("\\r")); // the escaped version of the string will be // stored in the tab file: dataRow[columnCounter] = "\"" + escapedString + "\""; // but note that the "raw" version of it is // used for the UNF: dataTable2[columnCounter][i] = string_datum; } byte_offset += strVarLength; break; default: dbgLog.fine("unknown variable type found"); String errorMessage = "unknow variable Type found at data section"; throw new InvalidObjectException(errorMessage); } // switch } // for-columnCounter // dump the row of data to the external file pwout.println(StringUtils.join(dataRow, "\t")); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th row's data={" + StringUtils.join(dataRow, ",") + "};"); } // for- i (row) pwout.close(); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer("\ndataTable2(variable-wise):\n"); dbgLog.finer(Arrays.deepToString(dataTable2)); dbgLog.finer("\ndateFormat(variable-wise):\n"); dbgLog.finer(Arrays.deepToString(dateFormat)); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("variableTypelList:\n" + Arrays.deepToString(variableTypelList)); dbgLog.fine("variableTypelListFinal:\n" + Arrays.deepToString(variableTypelListFinal)); } String[] unfValues = new String[nvar]; for (int j = 0; j < nvar; j++) { String variableType_j = variableTypelListFinal[j]; unfValues[j] = getUNF(dataTable2[j], dateFormat[j], variableType_j, unfVersionNumber, j); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(j + "th unf value" + unfValues[j]); } if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("unf set:\n" + Arrays.deepToString(unfValues)); fileUnfValue = UNF5Util.calculateUNF(unfValues); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("file-unf=" + fileUnfValue); stataDataSection.setUnf(unfValues); stataDataSection.setFileUnf(fileUnfValue); smd.setVariableUNF(unfValues); smd.getFileInformation().put("fileUNF", fileUnfValue); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine("unf values:\n" + unfValues); stataDataSection.setData(dataTable2); // close the stream dbgLog.fine("***** decodeData(): end *****\n\n"); }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java
private void decodeData(BufferedInputStream stream) throws IOException { dbgLog.fine("\n***** decodeData(): start *****"); if (stream == null) { throw new IllegalArgumentException("stream == null!"); }/*from ww w .java 2 s.com*/ //int nvar = (Integer)smd.getFileInformation().get("varQnty"); int nvar = dataTable.getVarQuantity().intValue(); //int nobs = (Integer)smd.getFileInformation().get("caseQnty"); int nobs = dataTable.getCaseQuantity().intValue(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("data dimensions[observations x variables] = (" + nobs + "x" + nvar + ")"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("bytes per row=" + bytes_per_row + " bytes"); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("variableTypes=" + Arrays.deepToString(variableTypes)); } if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("StringLengthTable=" + StringLengthTable); } // create a File object to save the tab-delimited data file FileOutputStream fileOutTab = null; PrintWriter pwout = null; File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab"); // save the temp tab-delimited file in the return ingest object: ingesteddata.setTabDelimitedFile(tabDelimitedDataFile); fileOutTab = new FileOutputStream(tabDelimitedDataFile); pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); /* Should we lose this dateFormat thing in 4.0? * the UNF should be calculatable on the app side solely from the data * stored in the tab file and the type information stored the dataVariable * object. * furthermore, the very idea of storing a format entry not just for * every variable, but for every value/observation is a bit strange. * TODO: review and confirm that, in the 3.* implementation, every * entry in dateFormat[nvar][*] is indeed the same - except for the * missing value entries. -- L.A. 4.0 (OK, I got rid of the dateFormat; instead I kinda sorta assume that the format is the same for every value in a column, save for the missing values... like this: dataTable.getDataVariables().get(columnCounter).setFormatSchemaName(ddt.format); BUT, this needs to be reviewed/confirmed etc! */ //String[][] dateFormat = new String[nvar][nobs]; for (int i = 0; i < nobs; i++) { byte[] dataRowBytes = new byte[bytes_per_row]; Object[] dataRow = new Object[nvar]; int nbytes = stream.read(dataRowBytes, 0, bytes_per_row); if (nbytes == 0) { String errorMessage = "reading data: no data were read at(" + i + "th row)"; throw new IOException(errorMessage); } // decoding each row int byte_offset = 0; for (int columnCounter = 0; columnCounter < variableTypes.length; columnCounter++) { Integer varType = variableTypeMap.get(variableTypes[columnCounter]); // 4.0 Check if this is a time/date variable: boolean isDateTimeDatum = false; String formatCategory = dataTable.getDataVariables().get(columnCounter).getFormatCategory(); if (formatCategory != null && (formatCategory.equals("time") || formatCategory.equals("date"))) { isDateTimeDatum = true; } String variableFormat = dateVariableFormats[columnCounter]; switch (varType != null ? varType : 256) { case -5: // Byte case // note: 1 byte signed byte byte_datum = dataRowBytes[byte_offset]; if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column byte =" + byte_datum); } if (byte_datum >= BYTE_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { dataRow[columnCounter] = byte_datum; } byte_offset++; break; case -4: // Stata-int (=java's short: 2byte) case // note: 2-byte signed int, not java's int ByteBuffer int_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 2); if (isLittleEndian) { int_buffer.order(ByteOrder.LITTLE_ENDIAN); } short short_datum = int_buffer.getShort(); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column stata int =" + short_datum); } if (short_datum >= INT_MISSIG_VALUE) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column stata long missing value=" + short_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("short", variableFormat, Short.toString(short_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; //dateFormat[columnCounter][i] = ddt.format; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = short_datum; } } byte_offset += 2; break; case -3: // stata-Long (= java's int: 4 byte) case // note: 4-byte singed, not java's long //dbgLog.fine("DATreader: stata long"); ByteBuffer long_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { long_buffer.order(ByteOrder.LITTLE_ENDIAN); } int int_datum = long_buffer.getInt(); if (dbgLog.isLoggable(Level.FINE)) { //dbgLog.fine(i + "-th row " + columnCounter // + "=th column stata long =" + int_datum); } if (int_datum >= LONG_MISSING_VALUE) { if (dbgLog.isLoggable(Level.FINE)) { //dbgLog.fine(i + "-th row " + columnCounter // + "=th column stata long missing value=" + int_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("int", variableFormat, Integer.toString(int_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = int_datum; } } byte_offset += 4; break; case -2: // float case // note: 4-byte ByteBuffer float_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4); if (isLittleEndian) { float_buffer.order(ByteOrder.LITTLE_ENDIAN); } float float_datum = float_buffer.getFloat(); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column float =" + float_datum); } if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column float missing value=" + float_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("float", variableFormat, doubleNumberFormatter.format(float_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = float_datum; // This may be temporary - but for now (as in, while I'm testing // 4.0 ingest against 3.* ingest, I need to be able to tell if a // floating point value was a single, or double float in the // original STATA file: -- L.A. Jul. 2014 dataTable.getDataVariables().get(columnCounter).setFormat("float"); } } byte_offset += 4; break; case -1: // double case // note: 8-byte ByteBuffer double_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 8); if (isLittleEndian) { double_buffer.order(ByteOrder.LITTLE_ENDIAN); } double double_datum = double_buffer.getDouble(); if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column double missing value=" + double_datum); } dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { if (isDateTimeDatum) { DecodedDateTime ddt = decodeDateTimeData("double", variableFormat, doubleNumberFormatter.format(double_datum)); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format=" + ddt.format); } dataRow[columnCounter] = ddt.decodedDateTime; dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format); } else { dataRow[columnCounter] = doubleNumberFormatter.format(double_datum); } } byte_offset += 8; break; case 0: // String case int strVarLength = StringLengthTable.get(columnCounter); String raw_datum = new String( Arrays.copyOfRange(dataRowBytes, byte_offset, (byte_offset + strVarLength)), "ISO-8859-1"); // TODO: // is it the right thing to do, to default to "ISO-8859-1"? // (it may be; since there's no mechanism for specifying // alternative encodings in Stata, this may be their default; // it just needs to be verified. -- L.A. Jul. 2014) String string_datum = getNullStrippedString(raw_datum); if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column string =" + string_datum); } if (string_datum.isEmpty()) { if (dbgLog.isLoggable(Level.FINER)) { dbgLog.finer(i + "-th row " + columnCounter + "=th column string missing value=" + string_datum); } // TODO: /* Is this really a missing value case? * Or is it an honest empty string? * Is there such a thing as a missing value for a String in Stata? * -- L.A. 4.0 */ dataRow[columnCounter] = MissingValueForTabDelimitedFile; } else { /* * Some special characters, like new lines and tabs need to * be escaped - otherwise they will break our TAB file * structure! * But before we escape anything, all the back slashes * already in the string need to be escaped themselves. */ String escapedString = string_datum.replace("\\", "\\\\"); // escape quotes: escapedString = escapedString.replaceAll("\"", Matcher.quoteReplacement("\\\"")); // escape tabs and new lines: escapedString = escapedString.replaceAll("\t", Matcher.quoteReplacement("\\t")); escapedString = escapedString.replaceAll("\n", Matcher.quoteReplacement("\\n")); escapedString = escapedString.replaceAll("\r", Matcher.quoteReplacement("\\r")); // the escaped version of the string is stored in the tab file // enclosed in double-quotes; this is in order to be able // to differentiate between an empty string (tab-delimited empty string in // double quotes) and a missing value (tab-delimited empty string). // Although the question still remains - is it even possible // to store an empty string, that's not a missing value, in Stata? // - see the comment in the missing value case above. -- L.A. 4.0 dataRow[columnCounter] = "\"" + escapedString + "\""; } byte_offset += strVarLength; break; default: dbgLog.fine("unknown variable type found"); String errorMessage = "unknow variable Type found at data section"; throw new InvalidObjectException(errorMessage); } // switch } // for-columnCounter // Dump the row of data to the tab-delimited file we are producing: pwout.println(StringUtils.join(dataRow, "\t")); if (dbgLog.isLoggable(Level.FINE)) { //dbgLog.fine(i + "-th row's data={" + StringUtils.join(dataRow, ",") + "};"); } } // for- i (row) pwout.close(); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("variableTypes:\n" + Arrays.deepToString(variableTypes)); } dbgLog.fine("DTA Ingest: decodeData(): end."); }
From source file:com.healthmarketscience.jackcess.Column.java
/** * Deserialize a raw byte value for this column into an Object * @param data The raw byte value/*w w w. ja v a2 s .c o m*/ * @param order Byte order in which the raw value is stored * @return The deserialized Object * @usage _advanced_method_ */ public Object read(byte[] data, ByteOrder order) throws IOException { ByteBuffer buffer = ByteBuffer.wrap(data); buffer.order(order); if (_type == DataType.BOOLEAN) { throw new IOException("Tried to read a boolean from data instead of null mask."); } else if (_type == DataType.BYTE) { return Byte.valueOf(buffer.get()); } else if (_type == DataType.INT) { return Short.valueOf(buffer.getShort()); } else if (_type == DataType.LONG) { return Integer.valueOf(buffer.getInt()); } else if (_type == DataType.DOUBLE) { return Double.valueOf(buffer.getDouble()); } else if (_type == DataType.FLOAT) { return Float.valueOf(buffer.getFloat()); } else if (_type == DataType.SHORT_DATE_TIME) { return readDateValue(buffer); } else if (_type == DataType.BINARY) { return data; } else if (_type == DataType.TEXT) { return decodeTextValue(data); } else if (_type == DataType.MONEY) { return readCurrencyValue(buffer); } else if (_type == DataType.OLE) { if (data.length > 0) { return readLongValue(data); } return null; } else if (_type == DataType.MEMO) { if (data.length > 0) { return readLongStringValue(data); } return null; } else if (_type == DataType.NUMERIC) { return readNumericValue(buffer); } else if (_type == DataType.GUID) { return readGUIDValue(buffer, order); } else if ((_type == DataType.UNKNOWN_0D) || (_type == DataType.UNKNOWN_11)) { // treat like "binary" data return data; } else if (_type == DataType.COMPLEX_TYPE) { return new ComplexValueForeignKey(this, buffer.getInt()); } else if (_type.isUnsupported()) { return rawDataWrapper(data); } else { throw new IOException("Unrecognized data type: " + _type); } }