List of usage examples for java.io RandomAccessFile readInt
public final int readInt() throws IOException
From source file:org.commoncrawl.service.listcrawler.HDFSFlusherThread.java
private long generateSequenceFileAndIndex(int itemFlushLimit, RandomAccessFile sourceLogFile, long startPos, long endPos, byte[] syncBytes, SequenceFile.Writer writer, DataOutput indexStreamOut, ArrayList<FingerprintAndOffsetTuple> tupleListOut) throws IOException { byte[] syncCheck = new byte[syncBytes.length]; // and create a list to hold fingerprint / offset information Vector<FingerprintAndOffsetTuple> fpOffsetList = new Vector<FingerprintAndOffsetTuple>(); long currentPos = startPos; LOG.info("Flushing Entries Starting up to offset:" + endPos); CacheItemHeader itemHeader = new CacheItemHeader(); int itemsProcessed = 0; boolean ignoreFlushLimit = false; // start read while (currentPos < endPos) { if ((endPos - currentPos) < LocalLogFileHeader.SYNC_BYTES_SIZE) break; // seek to current position ... sourceLogFile.seek(currentPos);/* w w w .j av a2 s . co m*/ boolean headerLoadFailed = false; try { // read the item header ... assuming things are good so far ... itemHeader.readHeader(sourceLogFile); } catch (IOException e) { CacheManager.LOG.error("### Item Header Load At Position:" + currentPos + " Failed With Exception:" + CCStringUtils.stringifyException(e)); headerLoadFailed = true; } if (headerLoadFailed) { CacheManager.LOG .error("### Item File Corrupt at position:" + currentPos + " Seeking Next Sync Point"); currentPos += LocalLogFileHeader.SYNC_BYTES_SIZE; } // if header sync bytes don't match .. then seek to next sync position ... if (headerLoadFailed || !Arrays.equals(itemHeader._sync, syncBytes)) { CacheManager.LOG .error("### Item File Corrupt at position:" + currentPos + " Seeking Next Sync Point"); // reseek to current pos sourceLogFile.seek(currentPos); // read in a sync.length buffer amount sourceLogFile.readFully(syncCheck); int syncLen = syncBytes.length; // start scan for next sync position ... for (int i = 0; sourceLogFile.getFilePointer() < endPos; i++) { int j = 0; for (; j < syncLen; j++) { if (syncBytes[j] != syncCheck[(i + j) % syncLen]) break; } if (j == syncLen) { sourceLogFile.seek(sourceLogFile.getFilePointer() - LocalLogFileHeader.SYNC_BYTES_SIZE); // position before sync break; } syncCheck[i % syncLen] = sourceLogFile.readByte(); } // whatever, happened file pointer is at current pos currentPos = sourceLogFile.getFilePointer(); if (currentPos < endPos) { CacheManager.LOG.info("### Item Loader Found another sync point at:" + currentPos); } else { CacheManager.LOG.error("### No more sync points found!"); } } else { CacheManager.LOG .info("WritingItem with FP:" + itemHeader._fingerprint + " Pos Is:" + writer.getLength()); // track offset information for index building purposes fpOffsetList.add(new FingerprintAndOffsetTuple(itemHeader._fingerprint, writer.getLength())); // read item data ... CacheItem cacheItem = new CacheItem(); cacheItem.readFields(sourceLogFile); // now read content length int contentLength = sourceLogFile.readInt(); // and if content present... allocate buffer if (contentLength != 0) { // allocate content buffer byte[] contentBuffer = new byte[contentLength]; // read it from disk sourceLogFile.readFully(contentBuffer); // and set content into cache item cacheItem.setContent(new Buffer(contentBuffer)); } CacheManager.LOG.info("Adding to Sequence File Item with URL:" + cacheItem.getUrl()); // write to sequence file ... writer.append(new Text(cacheItem.getUrl()), cacheItem); // now seek past data currentPos += CacheItemHeader.SIZE + itemHeader._dataLength + CacheManager.ITEM_RECORD_TRAILING_BYTES; // increment item count itemsProcessed++; } if (!ignoreFlushLimit && itemsProcessed >= itemFlushLimit) { // ok this gets tricky now ... // figure out how many bytes of data were required to get to flush limit long approxCheckpointSize = currentPos - startPos; // compute a threshold number long bytesThreshold = (long) (approxCheckpointSize * .70); // compute bytes remaining in checkpoint file ... long bytesRemaining = endPos - currentPos; // ok if bytes remaining are less than threshold number then go ahead and gobble // everything up in a single pass (to prevent smaller subsequent index if (bytesRemaining <= bytesThreshold) { // ignore the flush limit and keep on rolling to the end ... ignoreFlushLimit = true; LOG.warn("*****Bytes Remaining:" + bytesRemaining + " less than % of last whole chkpt size:" + approxCheckpointSize + ". Bypassing Flush Limit"); } else { LOG.info("Reached Flush Item Limit:" + itemsProcessed + " Breaking Out"); break; } } } LOG.info("Writing Index"); // ok now build the index file ... HDFSFileIndex.writeIndex(fpOffsetList, indexStreamOut); LOG.info("Done Writing Index. Total Items Written:" + fpOffsetList.size()); // copy offset list into tuple list tupleListOut.addAll(fpOffsetList); return currentPos; }
From source file:edu.umass.cs.gigapaxos.SQLPaxosLogger.java
private static void compactLogfile(File file, PaxosPacketizer packetizer, MessageLogDiskMap msgLog, FileIDMap fidMap) throws IOException, JSONException { RandomAccessFile raf = null, rafTmp = null; File tmpFile = new File(file.toString() + TMP_FILE_SUFFIX); int tmpFileSize = 0; boolean compacted = false, neededAtAll = false; HashMap<String, ArrayList<LogIndexEntry>> logIndexEntries = new HashMap<String, ArrayList<LogIndexEntry>>(); // quick delete if (fidMap.isRemovable(file.toString(), msgLog)) { deleteFile(file, msgLog);//w ww . jav a 2 s . co m log.log(Level.INFO, "{0} quick-garbage-collected file {1}", new Object[] { msgLog.disk, file }); return; } else log.log(Level.FINE, "{0} not quick-GCing file {1} because dependent paxosIDs = {2}", new Object[] { msgLog.disk, file, fidMap.fidMap.get(file.toString()) }); if (System.currentTimeMillis() - file.lastModified() < LOGFILE_AGE_THRESHOLD * 1000) return; try { long t = System.currentTimeMillis(); raf = new RandomAccessFile(file.toString(), "r"); rafTmp = new RandomAccessFile(tmpFile.toString(), "rw"); while (raf.getFilePointer() < raf.length()) { long offset = rafTmp.getFilePointer(); int length = raf.readInt(); byte[] msg = new byte[length]; raf.readFully(msg); PaxosPacket pp = packetizer != null ? packetizer.stringToPaxosPacket(msg // new String(msg, CHARSET) ) : PaxosPacket.getPaxosPacket(new String(msg, CHARSET)); if (!logIndexEntries.containsKey(pp.getPaxosID())) logIndexEntries.put(pp.getPaxosID(), new ArrayList<LogIndexEntry>()); logIndexEntries.get(pp.getPaxosID()).add(new LogIndexEntry(getSlot(pp), getBallot(pp).ballotNumber, getBallot(pp).coordinatorID, pp.getType().getInt(), file.toString(), offset, length)); if (isLogMsgNeeded(pp, msgLog)) { ByteBuffer bbuf = ByteBuffer.allocate(length + 4); bbuf.putInt(length); bbuf.put(msg); rafTmp.write(bbuf.array()); neededAtAll = true; tmpFileSize += bbuf.capacity(); } else { compacted = true; log.log(Level.FINE, "From logfile {0} garbage collecting {1}", new Object[] { file, pp.getSummary() }); } } DelayProfiler.updateDelay("compact", t); } finally { if (raf != null) raf.close(); if (rafTmp != null) { rafTmp.getChannel().force(true); rafTmp.close(); } } assert (tmpFile.exists() && tmpFile.length() == tmpFileSize) : tmpFile.length() + " != " + tmpFileSize; if (compacted && neededAtAll) synchronized (msgLog) { modifyLogfileAndLogIndex(file, tmpFile, logIndexEntries, msgLog, fidMap); log.log(Level.INFO, "{0} compacted logfile {1}", new Object[] { msgLog, file }); } else if (!neededAtAll) { log.log(Level.INFO, "Deleting logfile {0} as its log entries are no longer needed", new Object[] { file }); deleteFile(file, msgLog); } else // !compacted log.log(Level.INFO, "Logfile {0} unchanged upon compaction attempt", new Object[] { file }); assert (tmpFile.toString().endsWith(TMP_FILE_SUFFIX)); if (tmpFile.exists()) deleteFile(tmpFile, msgLog); }
From source file:edu.umass.cs.gigapaxos.SQLPaxosLogger.java
private static void mergeLogfiles(File prev, File cur, PaxosPacketizer packetizer, MessageLogDiskMap msgLog, FileIDMap fidMap) throws IOException, JSONException { File tmpFile = new File(cur.toString() + TMP_FILE_SUFFIX); RandomAccessFile rafTmp = null, rafPrev = null, rafCur = null; long t = System.currentTimeMillis(); try {/*from w w w .j av a 2 s . c o m*/ rafTmp = new RandomAccessFile(tmpFile.toString(), "rw"); rafPrev = new RandomAccessFile(prev.toString(), "r"); rafCur = new RandomAccessFile(cur.toString(), "r"); byte[] buf = new byte[1024]; int numRead = 0; // copy prev file to tmp file while ((numRead = rafPrev.read(buf)) > 0) rafTmp.write(buf, 0, numRead); // copy cur file to tmp file while ((numRead = rafCur.read(buf)) > 0) rafTmp.write(buf, 0, numRead); } finally { if (rafTmp != null) rafTmp.close(); if (rafPrev != null) rafPrev.close(); if (rafCur != null) rafCur.close(); } // copy tmp file index into memory HashMap<String, ArrayList<LogIndexEntry>> logIndexEntries = new HashMap<String, ArrayList<LogIndexEntry>>(); try { rafTmp = new RandomAccessFile(tmpFile.toString(), "r"); while (rafTmp.getFilePointer() < rafTmp.length()) { long offset = rafTmp.getFilePointer(); int length = rafTmp.readInt(); byte[] msg = new byte[length]; rafTmp.readFully(msg); PaxosPacket pp = packetizer != null ? packetizer.stringToPaxosPacket(msg // new String(msg, CHARSET) ) : PaxosPacket.getPaxosPacket(new String(msg, CHARSET)); assert (pp != null) : " read logged message " + new String(msg, CHARSET); if (!logIndexEntries.containsKey(pp.getPaxosID())) logIndexEntries.put(pp.getPaxosID(), new ArrayList<LogIndexEntry>()); logIndexEntries.get(pp.getPaxosID()).add(new LogIndexEntry(getSlot(pp), getBallot(pp).ballotNumber, getBallot(pp).coordinatorID, pp.getType().getInt(), cur.toString(), offset, length)); } } finally { if (rafTmp != null) rafTmp.close(); } // atomically copy tmpFile to cur, adjust log index, delete prev synchronized (msgLog) { modifyLogfileAndLogIndex(cur, tmpFile, logIndexEntries, msgLog, fidMap); if (prev.delete()) fidMap.remove(prev.toString()); } DelayProfiler.updateDelay("merge", t); log.log(Level.INFO, "{0} merged logfile {1} into {2}", new Object[] { msgLog, prev, cur }); }
From source file:edu.umass.cs.gigapaxos.SQLPaxosLogger.java
private byte[] getJournaledMessage(String logfile, long offset, int length, RandomAccessFile raf) throws IOException { assert (logfile != null); if (!new File(logfile).exists()) return null; boolean locallyOpened = false; if (raf == null) { locallyOpened = true;// w w w . j av a2s . c o m raf = new RandomAccessFile(logfile, "r"); } boolean error = false; String msg = null; byte[] buf = null; try { raf.seek(offset); assert (raf.length() > offset) : this + " " + raf.length() + " <= " + offset + " while reading logfile " + logfile; int readLength = raf.readInt(); try { assert (readLength == length) : this + " : " + readLength + " != " + length; } catch (Error e) { error = true; log.severe(this + ": " + e); e.printStackTrace(); } int bufLength = length; buf = new byte[bufLength]; raf.readFully(buf); if (JOURNAL_COMPRESSION) buf = inflate(buf); msg = new String(buf, CHARSET); } catch (IOException | Error e) { log.log(Level.INFO, "{0} incurred IOException while retrieving journaled message {1}:{2}", new Object[] { this, logfile, offset + ":" + length }); e.printStackTrace(); if (locallyOpened) raf.close(); throw e; } log.log(error ? Level.INFO : Level.FINEST, "{0} returning journaled message from {1}:{2} = [{3}]", new Object[] { this, logfile, offset + ":" + length, msg }); return buf;// msg; }