List of usage examples for java.nio ByteBuffer getInt
public abstract int getInt();
From source file:hivemall.mf.OnlineMatrixFactorizationUDTF.java
protected final void runIterativeTraining(@Nonnegative final int iterations) throws HiveException { final ByteBuffer inputBuf = this.inputBuf; final NioFixedSegment fileIO = this.fileIO; assert (inputBuf != null); assert (fileIO != null); final long numTrainingExamples = count; final Reporter reporter = getReporter(); final Counter iterCounter = (reporter == null) ? null : reporter.getCounter("hivemall.mf.MatrixFactorization$Counter", "iteration"); try {//ww w. ja va 2 s. c o m if (lastWritePos == 0) {// run iterations w/o temporary file if (inputBuf.position() == 0) { return; // no training example } inputBuf.flip(); int iter = 2; for (; iter <= iterations; iter++) { reportProgress(reporter); setCounterValue(iterCounter, iter); while (inputBuf.remaining() > 0) { int user = inputBuf.getInt(); int item = inputBuf.getInt(); double rating = inputBuf.getDouble(); // invoke train count++; train(user, item, rating); } cvState.multiplyLoss(0.5d); if (cvState.isConverged(iter, numTrainingExamples)) { break; } inputBuf.rewind(); } logger.info("Performed " + Math.min(iter, iterations) + " iterations of " + NumberUtils.formatNumber(numTrainingExamples) + " training examples on memory (thus " + NumberUtils.formatNumber(count) + " training updates in total) "); } else {// read training examples in the temporary file and invoke train for each example // write training examples in buffer to a temporary file if (inputBuf.position() > 0) { writeBuffer(inputBuf, fileIO, lastWritePos); } else if (lastWritePos == 0) { return; // no training example } try { fileIO.flush(); } catch (IOException e) { throw new HiveException("Failed to flush a file: " + fileIO.getFile().getAbsolutePath(), e); } if (logger.isInfoEnabled()) { File tmpFile = fileIO.getFile(); logger.info( "Wrote " + numTrainingExamples + " records to a temporary file for iterative training: " + tmpFile.getAbsolutePath() + " (" + FileUtils.prettyFileSize(tmpFile) + ")"); } // run iterations int iter = 2; for (; iter <= iterations; iter++) { setCounterValue(iterCounter, iter); inputBuf.clear(); long seekPos = 0L; while (true) { reportProgress(reporter); // TODO prefetch // writes training examples to a buffer in the temporary file final int bytesRead; try { bytesRead = fileIO.read(seekPos, inputBuf); } catch (IOException e) { throw new HiveException("Failed to read a file: " + fileIO.getFile().getAbsolutePath(), e); } if (bytesRead == 0) { // reached file EOF break; } assert (bytesRead > 0) : bytesRead; seekPos += bytesRead; // reads training examples from a buffer inputBuf.flip(); int remain = inputBuf.remaining(); assert (remain > 0) : remain; for (; remain >= RECORD_BYTES; remain -= RECORD_BYTES) { int user = inputBuf.getInt(); int item = inputBuf.getInt(); double rating = inputBuf.getDouble(); // invoke train count++; train(user, item, rating); } inputBuf.compact(); } cvState.multiplyLoss(0.5d); if (cvState.isConverged(iter, numTrainingExamples)) { break; } } logger.info("Performed " + Math.min(iter, iterations) + " iterations of " + NumberUtils.formatNumber(numTrainingExamples) + " training examples using a secondary storage (thus " + NumberUtils.formatNumber(count) + " training updates in total)"); } } finally { // delete the temporary file and release resources try { fileIO.close(true); } catch (IOException e) { throw new HiveException("Failed to close a file: " + fileIO.getFile().getAbsolutePath(), e); } this.inputBuf = null; this.fileIO = null; } }
From source file:edu.vu.isis.ammo.dash.provider.IncidentSyncAdaptor.java
protected long deserializer(File file, IMyWriter writer) { logger.debug("::deserializer"); InputStream ins;//www.j a va 2s.c om try { ins = new FileInputStream(file); } catch (FileNotFoundException e1) { return -1; } BufferedInputStream bufferedInput = new BufferedInputStream(ins); byte[] buffer = new byte[1024]; StringBuilder sb = new StringBuilder(); long rowId = -1; String label = ""; byte[] payloadSizeBuf = new byte[4]; int payloadSize = 0; byte[] payloadBuf = null; int payloadPos = 0; try { int bytesBuffered = bufferedInput.read(buffer); int bufferPos = 0; int state = READING_META; boolean eod = false; while (bytesBuffered > -1) { if (bytesBuffered == bufferPos) { bytesBuffered = bufferedInput.read(buffer); bufferPos = 0; // reset buffer position } if (bytesBuffered < 0) eod = true; switch (state) { case READING_META: if (eod) { writer.meta(sb); break; } for (; bytesBuffered > bufferPos; bufferPos++) { byte b = buffer[bufferPos]; if (b == '\0') { bufferPos++; state = READING_LABEL; rowId = writer.meta(sb); sb = new StringBuilder(); break; } sb.append((char) b); } break; case READING_LABEL: if (eod) break; for (; bytesBuffered > bufferPos; bufferPos++) { byte b = buffer[bufferPos]; if (b == '\0') { label = sb.toString(); bufferPos++; state = READING_PAYLOAD_SIZE; payloadPos = 0; break; } sb.append((char) b); } break; case READING_PAYLOAD_SIZE: if ((bytesBuffered - bufferPos) < (payloadSizeBuf.length - payloadPos)) { // buffer doesn't contain the last byte of the length for (; bytesBuffered > bufferPos; bufferPos++, payloadPos++) { payloadSizeBuf[payloadPos] = buffer[bufferPos]; } } else { // buffer contains the last byte of the length for (; payloadSizeBuf.length > payloadPos; bufferPos++, payloadPos++) { payloadSizeBuf[payloadPos] = buffer[bufferPos]; } ByteBuffer dataSizeBuf = ByteBuffer.wrap(payloadSizeBuf); dataSizeBuf.order(ByteOrder.LITTLE_ENDIAN); payloadSize = dataSizeBuf.getInt(); payloadBuf = new byte[payloadSize]; payloadPos = 0; state = READING_PAYLOAD; } break; case READING_PAYLOAD: if ((bytesBuffered - bufferPos) < (payloadSize - payloadPos)) { for (; bytesBuffered > bufferPos; bufferPos++, payloadPos++) { payloadBuf[payloadPos] = buffer[bufferPos]; } } else { for (; payloadSize > payloadPos; bufferPos++, payloadPos++) { payloadBuf[payloadPos] = buffer[bufferPos]; } payloadPos = 0; state = READING_PAYLOAD_CHECK; } break; case READING_PAYLOAD_CHECK: if ((bytesBuffered - bufferPos) < (payloadSizeBuf.length - payloadPos)) { for (; bytesBuffered > bufferPos; bufferPos++, payloadPos++) { payloadSizeBuf[payloadPos] = buffer[bufferPos]; } } else { for (; payloadSizeBuf.length > payloadPos; bufferPos++, payloadPos++) { payloadSizeBuf[payloadPos] = buffer[bufferPos]; } ByteBuffer dataSizeBuf = ByteBuffer.wrap(payloadSizeBuf); dataSizeBuf.order(ByteOrder.LITTLE_ENDIAN); if (payloadSize != dataSizeBuf.getInt()) { logger.error("message garbled {} {}", payloadSize, dataSizeBuf.getInt()); state = READING_LABEL; break; } writer.payload(rowId, label, payloadBuf); state = READING_LABEL; } break; } } bufferedInput.close(); } catch (IOException e) { logger.error("could not read serialized file"); return -1; } return rowId; }
From source file:hivemall.mf.BPRMatrixFactorizationUDTF.java
private final void runIterativeTraining(@Nonnegative final int iterations) throws HiveException { final ByteBuffer inputBuf = this.inputBuf; final NioFixedSegment fileIO = this.fileIO; assert (inputBuf != null); assert (fileIO != null); final long numTrainingExamples = count; final Reporter reporter = getReporter(); final Counter iterCounter = (reporter == null) ? null : reporter.getCounter("hivemall.mf.BPRMatrixFactorization$Counter", "iteration"); try {//ww w. j a v a 2s . com if (lastWritePos == 0) {// run iterations w/o temporary file if (inputBuf.position() == 0) { return; // no training example } inputBuf.flip(); int iter = 2; for (; iter <= iterations; iter++) { reportProgress(reporter); setCounterValue(iterCounter, iter); while (inputBuf.remaining() > 0) { int u = inputBuf.getInt(); int i = inputBuf.getInt(); int j = inputBuf.getInt(); // invoke train count++; train(u, i, j); } cvState.multiplyLoss(0.5d); cvState.logState(iter, eta()); if (cvState.isConverged(iter, numTrainingExamples)) { break; } if (cvState.isLossIncreased()) { etaEstimator.update(1.1f); } else { etaEstimator.update(0.5f); } inputBuf.rewind(); } LOG.info("Performed " + Math.min(iter, iterations) + " iterations of " + NumberUtils.formatNumber(numTrainingExamples) + " training examples on memory (thus " + NumberUtils.formatNumber(count) + " training updates in total) "); } else {// read training examples in the temporary file and invoke train for each example // write training examples in buffer to a temporary file if (inputBuf.position() > 0) { writeBuffer(inputBuf, fileIO, lastWritePos); } else if (lastWritePos == 0) { return; // no training example } try { fileIO.flush(); } catch (IOException e) { throw new HiveException("Failed to flush a file: " + fileIO.getFile().getAbsolutePath(), e); } if (LOG.isInfoEnabled()) { File tmpFile = fileIO.getFile(); LOG.info( "Wrote " + numTrainingExamples + " records to a temporary file for iterative training: " + tmpFile.getAbsolutePath() + " (" + FileUtils.prettyFileSize(tmpFile) + ")"); } // run iterations int iter = 2; for (; iter <= iterations; iter++) { setCounterValue(iterCounter, iter); inputBuf.clear(); long seekPos = 0L; while (true) { reportProgress(reporter); // TODO prefetch // writes training examples to a buffer in the temporary file final int bytesRead; try { bytesRead = fileIO.read(seekPos, inputBuf); } catch (IOException e) { throw new HiveException("Failed to read a file: " + fileIO.getFile().getAbsolutePath(), e); } if (bytesRead == 0) { // reached file EOF break; } assert (bytesRead > 0) : bytesRead; seekPos += bytesRead; // reads training examples from a buffer inputBuf.flip(); int remain = inputBuf.remaining(); assert (remain > 0) : remain; for (; remain >= RECORD_BYTES; remain -= RECORD_BYTES) { int u = inputBuf.getInt(); int i = inputBuf.getInt(); int j = inputBuf.getInt(); // invoke train count++; train(u, i, j); } inputBuf.compact(); } cvState.multiplyLoss(0.5d); cvState.logState(iter, eta()); if (cvState.isConverged(iter, numTrainingExamples)) { break; } if (cvState.isLossIncreased()) { etaEstimator.update(1.1f); } else { etaEstimator.update(0.5f); } } LOG.info("Performed " + Math.min(iter, iterations) + " iterations of " + NumberUtils.formatNumber(numTrainingExamples) + " training examples using a secondary storage (thus " + NumberUtils.formatNumber(count) + " training updates in total)"); } } finally { // delete the temporary file and release resources try { fileIO.close(true); } catch (IOException e) { throw new HiveException("Failed to close a file: " + fileIO.getFile().getAbsolutePath(), e); } this.inputBuf = null; this.fileIO = null; } }
From source file:com.koda.integ.hbase.blockcache.OffHeapBlockCache.java
/** * Read external with codec.//from w w w . j a v a2 s . c om * * @param blockName the block name * @return the cacheable * @throws IOException Signals that an I/O exception has occurred. */ @SuppressWarnings("unused") private Cacheable readExternalWithCodec(String blockName, boolean repeat, boolean caching) throws IOException { if (overflowExtEnabled == false) return null; // Check if we have already this block in external storage cache try { // We use 16 - byte hash for external storage cache byte[] hashed = Utils.hash128(blockName); StorageHandle handle = storage.newStorageHandle(); byte[] data = (byte[]) extStorageCache.get(hashed); if (data == null) { if (repeat == false) extRefStats.miss(caching); return null; } else { extRefStats.hit(caching); } // Initialize handle handle.fromBytes(data); ByteBuffer buffer = extStorageCache.getLocalBufferWithAddress().getBuffer(); SerDe serde = extStorageCache.getSerDe(); Codec codec = extStorageCache.getCompressionCodec(); buffer.clear(); StorageHandle newHandle = storage.getData(handle, buffer); if (buffer.position() > 0) buffer.flip(); int size = buffer.getInt(); if (size == 0) { // BIGBASE-45 // Remove reference from reference cache // reference is in L3-RAM cache but no object in L3-DISK cache was found // remove only if handle is invalid if (storage.isValid(handle) == false) { extStorageCache.remove(hashed); } return null; } // Skip key int keySize = buffer.getInt(); buffer.position(8 + keySize); boolean inMemory = buffer.get() == (byte) 1; buffer.limit(size + 4); Cacheable obj = (Cacheable) serde.readCompressed(buffer/*, codec*/); offHeapCache.put(blockName, obj); if (newHandle.equals(handle) == false) { extStorageCache.put(hashed, newHandle.toBytes()); } return obj; } catch (Throwable e) { fatalExternalReads.incrementAndGet(); throw new IOException(e); } }
From source file:com.att.aro.core.packetanalysis.impl.VideoUsageAnalysisImpl.java
/** * Parse mp4 chunk/segment that contains one moof and one mdat. * * @param content//from www.j a v a2 s .c o m * @return double[] mdat payload length, time sequence */ private Integer[] parsePayload(byte[] content) { byte[] buf = new byte[4]; int mdatSize = 0; ByteBuffer bbc = ByteBuffer.wrap(content); // get moof size double moofSize = bbc.getInt(); bbc.get(buf); String moofName = new String(buf); int timeSequence = 0; if (moofName.equals("moof")) { // skip past mfhd double mfhdSize = bbc.getInt(); bbc.get(buf); String mfhdName = new String(buf); if (mfhdName.equals("mfhd")) { bbc.position((int) mfhdSize + bbc.position() - 8); // parse into traf // double trafSize = bbc.getInt(); // skip over bbc.get(buf); String trafName = new String(buf); if (trafName.equals("traf")) { // skip tfhd double tfhdSize = bbc.getInt(); bbc.get(buf); String tfhdName = new String(buf); if (tfhdName.equals("tfhd")) { // skip past this atom bbc.position((int) tfhdSize + bbc.position() - 8); } // parse tfdt // double tfdtSize = bbc.getInt(); // skip over bbc.get(buf); String tfdtName = new String(buf); if (tfdtName.equals("tfdt")) { bbc.getInt(); // skip over always 16k bbc.getInt(); // skip over always 0 timeSequence = bbc.getInt(); } } } } else { return new Integer[] { 0, 0 }; } // parse mdat bbc.position((int) moofSize); mdatSize = bbc.getInt(); bbc.get(buf, 0, 4); String mdatName = new String(buf); if (mdatName.equals("mdat")) { mdatSize -= 8; } else { mdatSize = 0; } return new Integer[] { mdatSize, timeSequence }; }
From source file:hivemall.recommend.SlimUDTF.java
private void runIterativeTraining() throws HiveException { final ByteBuffer buf = this._inputBuf; final NioStatefulSegment dst = this._fileIO; assert (buf != null); assert (dst != null); final Reporter reporter = getReporter(); final Counters.Counter iterCounter = (reporter == null) ? null : reporter.getCounter("hivemall.recommend.slim$Counter", "iteration"); try {// w w w.j a v a2 s . co m if (dst.getPosition() == 0L) {// run iterations w/o temporary file if (buf.position() == 0) { return; // no training example } buf.flip(); for (int iter = 2; iter < numIterations; iter++) { _cvState.next(); reportProgress(reporter); setCounterValue(iterCounter, iter); while (buf.remaining() > 0) { int recordBytes = buf.getInt(); assert (recordBytes > 0) : recordBytes; replayTrain(buf); } buf.rewind(); if (_cvState.isConverged(_observedTrainingExamples)) { break; } } logger.info("Performed " + _cvState.getCurrentIteration() + " iterations of " + NumberUtils.formatNumber(_observedTrainingExamples) + " training examples on memory (thus " + NumberUtils.formatNumber(_observedTrainingExamples * _cvState.getCurrentIteration()) + " training updates in total) "); } else { // read training examples in the temporary file and invoke train for each example // write KNNi in buffer to a temporary file if (buf.remaining() > 0) { writeBuffer(buf, dst); } try { dst.flush(); } catch (IOException e) { throw new HiveException("Failed to flush a file: " + dst.getFile().getAbsolutePath(), e); } if (logger.isInfoEnabled()) { File tmpFile = dst.getFile(); logger.info("Wrote KNN entries of axis items to a temporary file for iterative training: " + tmpFile.getAbsolutePath() + " (" + FileUtils.prettyFileSize(tmpFile) + ")"); } // run iterations for (int iter = 2; iter < numIterations; iter++) { _cvState.next(); setCounterValue(iterCounter, iter); buf.clear(); dst.resetPosition(); while (true) { reportProgress(reporter); // load a KNNi to a buffer in the temporary file final int bytesRead; try { bytesRead = dst.read(buf); } catch (IOException e) { throw new HiveException("Failed to read a file: " + dst.getFile().getAbsolutePath(), e); } if (bytesRead == 0) { // reached file EOF break; } assert (bytesRead > 0) : bytesRead; // reads training examples from a buffer buf.flip(); int remain = buf.remaining(); if (remain < SizeOf.INT) { throw new HiveException("Illegal file format was detected"); } while (remain >= SizeOf.INT) { int pos = buf.position(); int recordBytes = buf.getInt(); remain -= SizeOf.INT; if (remain < recordBytes) { buf.position(pos); break; } replayTrain(buf); remain -= recordBytes; } buf.compact(); } if (_cvState.isConverged(_observedTrainingExamples)) { break; } } logger.info("Performed " + _cvState.getCurrentIteration() + " iterations of " + NumberUtils.formatNumber(_observedTrainingExamples) + " training examples on memory and KNNi data on secondary storage (thus " + NumberUtils.formatNumber(_observedTrainingExamples * _cvState.getCurrentIteration()) + " training updates in total) "); } } catch (Throwable e) { throw new HiveException("Exception caused in the iterative training", e); } finally { // delete the temporary file and release resources try { dst.close(true); } catch (IOException e) { throw new HiveException("Failed to close a file: " + dst.getFile().getAbsolutePath(), e); } this._inputBuf = null; this._fileIO = null; } }
From source file:eu.stratosphere.nephele.services.iomanager.IOManagerPerformanceBenchmark.java
@SuppressWarnings("resource") private final void speedTestNIO(int bufferSize, boolean direct) throws IOException { final Channel.ID tmpChannel = ioManager.createChannel(); File tempFile = null;//from w w w. j a v a 2s. c om FileChannel fs = null; try { tempFile = new File(tmpChannel.getPath()); RandomAccessFile raf = new RandomAccessFile(tempFile, "rw"); fs = raf.getChannel(); ByteBuffer buf = direct ? ByteBuffer.allocateDirect(bufferSize) : ByteBuffer.allocate(bufferSize); long writeStart = System.currentTimeMillis(); int valsLeft = NUM_INTS_WRITTEN; while (valsLeft-- > 0) { if (buf.remaining() < 4) { buf.flip(); fs.write(buf); buf.clear(); } buf.putInt(valsLeft); } if (buf.position() > 0) { buf.flip(); fs.write(buf); } fs.close(); raf.close(); fs = null; long writeElapsed = System.currentTimeMillis() - writeStart; // ---------------------------------------------------------------- raf = new RandomAccessFile(tempFile, "r"); fs = raf.getChannel(); buf.clear(); long readStart = System.currentTimeMillis(); fs.read(buf); buf.flip(); valsLeft = NUM_INTS_WRITTEN; while (valsLeft-- > 0) { if (buf.remaining() < 4) { buf.compact(); fs.read(buf); buf.flip(); } if (buf.getInt() != valsLeft) { throw new IOException(); } } fs.close(); raf.close(); long readElapsed = System.currentTimeMillis() - readStart; LOG.info("NIO Channel with buffer " + bufferSize + ": write " + writeElapsed + " msecs, read " + readElapsed + " msecs."); } finally { // close if possible if (fs != null) { fs.close(); fs = null; } // try to delete the file if (tempFile != null) { tempFile.delete(); } } }
From source file:org.apache.carbondata.core.util.CarbonUtil.java
public static ValueEncoderMeta deserializeEncoderMetaNew(byte[] encodeMeta) { ByteBuffer buffer = ByteBuffer.wrap(encodeMeta); char measureType = buffer.getChar(); ValueEncoderMeta valueEncoderMeta = new ValueEncoderMeta(); valueEncoderMeta.setType(measureType); switch (measureType) { case CarbonCommonConstants.DOUBLE_MEASURE: valueEncoderMeta.setMaxValue(buffer.getDouble()); valueEncoderMeta.setMinValue(buffer.getDouble()); valueEncoderMeta.setUniqueValue(buffer.getDouble()); break;/*from w w w.j a v a2 s .c o m*/ case CarbonCommonConstants.BIG_DECIMAL_MEASURE: valueEncoderMeta.setMaxValue(0.0); valueEncoderMeta.setMinValue(0.0); valueEncoderMeta.setUniqueValue(0.0); break; case CarbonCommonConstants.BIG_INT_MEASURE: valueEncoderMeta.setMaxValue(buffer.getLong()); valueEncoderMeta.setMinValue(buffer.getLong()); valueEncoderMeta.setUniqueValue(buffer.getLong()); break; default: throw new IllegalArgumentException("invalid measure type"); } valueEncoderMeta.setDecimal(buffer.getInt()); valueEncoderMeta.setDataTypeSelected(buffer.get()); return valueEncoderMeta; }
From source file:hivemall.fm.FactorizationMachineUDTF.java
protected void runTrainingIteration(int iterations) throws HiveException { final ByteBuffer inputBuf = this._inputBuf; final NioStatefullSegment fileIO = this._fileIO; assert (inputBuf != null); assert (fileIO != null); final long numTrainingExamples = _t; final boolean adaregr = _va_rand != null; final Reporter reporter = getReporter(); final Counter iterCounter = (reporter == null) ? null : reporter.getCounter("hivemall.fm.FactorizationMachines$Counter", "iteration"); try {/*ww w . ja v a 2 s . c o m*/ if (fileIO.getPosition() == 0L) {// run iterations w/o temporary file if (inputBuf.position() == 0) { return; // no training example } inputBuf.flip(); int iter = 2; for (; iter <= iterations; iter++) { reportProgress(reporter); setCounterValue(iterCounter, iter); while (inputBuf.remaining() > 0) { int bytes = inputBuf.getInt(); assert (bytes > 0) : bytes; int xLength = inputBuf.getInt(); final Feature[] x = new Feature[xLength]; for (int j = 0; j < xLength; j++) { x[j] = instantiateFeature(inputBuf); } double y = inputBuf.getDouble(); // invoke train ++_t; train(x, y, adaregr); } if (_cvState.isConverged(iter, numTrainingExamples)) { break; } inputBuf.rewind(); } LOG.info("Performed " + Math.min(iter, iterations) + " iterations of " + NumberUtils.formatNumber(numTrainingExamples) + " training examples on memory (thus " + NumberUtils.formatNumber(_t) + " training updates in total) "); } else {// read training examples in the temporary file and invoke train for each example // write training examples in buffer to a temporary file if (inputBuf.remaining() > 0) { writeBuffer(inputBuf, fileIO); } try { fileIO.flush(); } catch (IOException e) { throw new HiveException("Failed to flush a file: " + fileIO.getFile().getAbsolutePath(), e); } if (LOG.isInfoEnabled()) { File tmpFile = fileIO.getFile(); LOG.info( "Wrote " + numTrainingExamples + " records to a temporary file for iterative training: " + tmpFile.getAbsolutePath() + " (" + FileUtils.prettyFileSize(tmpFile) + ")"); } // run iterations int iter = 2; for (; iter <= iterations; iter++) { setCounterValue(iterCounter, iter); inputBuf.clear(); fileIO.resetPosition(); while (true) { reportProgress(reporter); // TODO prefetch // writes training examples to a buffer in the temporary file final int bytesRead; try { bytesRead = fileIO.read(inputBuf); } catch (IOException e) { throw new HiveException("Failed to read a file: " + fileIO.getFile().getAbsolutePath(), e); } if (bytesRead == 0) { // reached file EOF break; } assert (bytesRead > 0) : bytesRead; // reads training examples from a buffer inputBuf.flip(); int remain = inputBuf.remaining(); if (remain < INT_BYTES) { throw new HiveException("Illegal file format was detected"); } while (remain >= INT_BYTES) { int pos = inputBuf.position(); int recordBytes = inputBuf.getInt(); remain -= INT_BYTES; if (remain < recordBytes) { inputBuf.position(pos); break; } final int xLength = inputBuf.getInt(); final Feature[] x = new Feature[xLength]; for (int j = 0; j < xLength; j++) { x[j] = instantiateFeature(inputBuf); } double y = inputBuf.getDouble(); // invoke training ++_t; train(x, y, adaregr); remain -= recordBytes; } inputBuf.compact(); } if (_cvState.isConverged(iter, numTrainingExamples)) { break; } } LOG.info("Performed " + Math.min(iter, iterations) + " iterations of " + NumberUtils.formatNumber(numTrainingExamples) + " training examples on a secondary storage (thus " + NumberUtils.formatNumber(_t) + " training updates in total)"); } } finally { // delete the temporary file and release resources try { fileIO.close(true); } catch (IOException e) { throw new HiveException("Failed to close a file: " + fileIO.getFile().getAbsolutePath(), e); } this._inputBuf = null; this._fileIO = null; } }
From source file:hivemall.GeneralLearnerBaseUDTF.java
protected final void runIterativeTraining(@Nonnegative final int iterations) throws HiveException { final ByteBuffer buf = this.inputBuf; final NioStatefulSegment dst = this.fileIO; assert (buf != null); assert (dst != null); final long numTrainingExamples = count; final Reporter reporter = getReporter(); final Counters.Counter iterCounter = (reporter == null) ? null : reporter.getCounter("hivemall.GeneralLearnerBase$Counter", "iteration"); try {// w ww .ja v a 2 s . c o m if (dst.getPosition() == 0L) {// run iterations w/o temporary file if (buf.position() == 0) { return; // no training example } buf.flip(); for (int iter = 2; iter <= iterations; iter++) { cvState.next(); reportProgress(reporter); setCounterValue(iterCounter, iter); while (buf.remaining() > 0) { int recordBytes = buf.getInt(); assert (recordBytes > 0) : recordBytes; int featureVectorLength = buf.getInt(); final FeatureValue[] featureVector = new FeatureValue[featureVectorLength]; for (int j = 0; j < featureVectorLength; j++) { featureVector[j] = readFeatureValue(buf, featureType); } float target = buf.getFloat(); train(featureVector, target); } buf.rewind(); if (is_mini_batch) { // Update model with accumulated delta batchUpdate(); } if (cvState.isConverged(numTrainingExamples)) { break; } } logger.info("Performed " + cvState.getCurrentIteration() + " iterations of " + NumberUtils.formatNumber(numTrainingExamples) + " training examples on memory (thus " + NumberUtils.formatNumber(numTrainingExamples * cvState.getCurrentIteration()) + " training updates in total) "); } else {// read training examples in the temporary file and invoke train for each example // write training examples in buffer to a temporary file if (buf.remaining() > 0) { writeBuffer(buf, dst); } try { dst.flush(); } catch (IOException e) { throw new HiveException("Failed to flush a file: " + dst.getFile().getAbsolutePath(), e); } if (logger.isInfoEnabled()) { File tmpFile = dst.getFile(); logger.info( "Wrote " + numTrainingExamples + " records to a temporary file for iterative training: " + tmpFile.getAbsolutePath() + " (" + FileUtils.prettyFileSize(tmpFile) + ")"); } // run iterations for (int iter = 2; iter <= iterations; iter++) { cvState.next(); setCounterValue(iterCounter, iter); buf.clear(); dst.resetPosition(); while (true) { reportProgress(reporter); // TODO prefetch // writes training examples to a buffer in the temporary file final int bytesRead; try { bytesRead = dst.read(buf); } catch (IOException e) { throw new HiveException("Failed to read a file: " + dst.getFile().getAbsolutePath(), e); } if (bytesRead == 0) { // reached file EOF break; } assert (bytesRead > 0) : bytesRead; // reads training examples from a buffer buf.flip(); int remain = buf.remaining(); if (remain < SizeOf.INT) { throw new HiveException("Illegal file format was detected"); } while (remain >= SizeOf.INT) { int pos = buf.position(); int recordBytes = buf.getInt(); remain -= SizeOf.INT; if (remain < recordBytes) { buf.position(pos); break; } int featureVectorLength = buf.getInt(); final FeatureValue[] featureVector = new FeatureValue[featureVectorLength]; for (int j = 0; j < featureVectorLength; j++) { featureVector[j] = readFeatureValue(buf, featureType); } float target = buf.getFloat(); train(featureVector, target); remain -= recordBytes; } buf.compact(); } if (is_mini_batch) { // Update model with accumulated delta batchUpdate(); } if (cvState.isConverged(numTrainingExamples)) { break; } } logger.info("Performed " + cvState.getCurrentIteration() + " iterations of " + NumberUtils.formatNumber(numTrainingExamples) + " training examples on a secondary storage (thus " + NumberUtils.formatNumber(numTrainingExamples * cvState.getCurrentIteration()) + " training updates in total)"); } } catch (Throwable e) { throw new HiveException("Exception caused in the iterative training", e); } finally { // delete the temporary file and release resources try { dst.close(true); } catch (IOException e) { throw new HiveException("Failed to close a file: " + dst.getFile().getAbsolutePath(), e); } this.inputBuf = null; this.fileIO = null; } }