List of usage examples for java.io RandomAccessFile getFilePointer
public native long getFilePointer() throws IOException;
From source file:org.commoncrawl.service.crawler.CrawlList.java
private static void appendTargetsToLogFile(File logFileName, IntrusiveList<CrawlTarget> list) throws IOException { LogFileHeader header = new LogFileHeader(); boolean preExistingHeader = logFileName.exists(); RandomAccessFile file = new RandomAccessFile(logFileName, "rw"); try {//from w ww.j a va2 s .com long headerOffset = 0; if (preExistingHeader) { headerOffset = readLogFileHeader(file, header); if (header._writePos == 0) { file.seek(headerOffset); } else { // seelk to appropriate write position file.seek(header._writePos); } } else { headerOffset = writeLogFileHeader(file, header); } CustomByteArrayOutputStream bufferOutputStream = new CustomByteArrayOutputStream(1 << 17); DataOutputStream dataOutputStream = new DataOutputStream(bufferOutputStream); CRC32 crc = new CRC32(); for (CrawlTarget target : list) { PersistentCrawlTarget persistentTarget = target.createPersistentTarget(); bufferOutputStream.reset(); // write to intermediate stream ... persistentTarget.write(dataOutputStream); // and crc the data ... crc.reset(); crc.update(bufferOutputStream.getBuffer(), 0, bufferOutputStream.size()); // write out length first file.writeInt(bufferOutputStream.size()); //crc next long computedValue = crc.getValue(); //TODO: waste of space - write 32 bit values as long because having problems with java sign promotion rules during read... file.writeLong(computedValue); // and then the data file.write(bufferOutputStream.getBuffer(), 0, bufferOutputStream.size()); } // now update header ... header._itemCount += list.size(); header._writePos = file.getFilePointer(); // now write out header anew ... writeLogFileHeader(file, header); } finally { if (file != null) { file.close(); } } }
From source file:org.commoncrawl.service.listcrawler.CacheManager.java
/** * loadCache - load local cache from disk * @param activeLogPath/*from www . ja v a2s.c o m*/ * @param logFileHeader * @throws IOException */ private synchronized void loadCache(File activeLogPath, LocalLogFileHeader logFileHeader) throws IOException { RandomAccessFile file = new RandomAccessFile(getActiveLogFilePath(), "rw"); byte[] syncCheck = new byte[_header._sync.length]; try { long lastValidPos = LocalLogFileHeader.SIZE; long currentPos = lastValidPos; long endPos = file.length(); CacheItemHeader itemHeader = new CacheItemHeader(); // start read while (currentPos < endPos) { if ((endPos - currentPos) < LocalLogFileHeader.SYNC_BYTES_SIZE) break; // seek to current position ... file.seek(currentPos); boolean headerLoadFailed = false; try { // read the item header ... assuming things are good so far ... itemHeader.readHeader(file); } catch (IOException e) { LOG.error("### Item Header Load Failed With Exception:" + CCStringUtils.stringifyException(e)); headerLoadFailed = true; } if (headerLoadFailed) { LOG.error("### Item File Corrupt at position:" + currentPos + " Seeking Next Sync Point"); currentPos += LocalLogFileHeader.SYNC_BYTES_SIZE; } // if header sync bytes don't match .. then seek to next sync position ... if (headerLoadFailed || !Arrays.equals(itemHeader._sync, _header._sync)) { LOG.error("### Item File Corrupt at position:" + currentPos + " Seeking Next Sync Point"); // reseek to current pos file.seek(currentPos); // read in a sync.length buffer amount file.readFully(syncCheck); int syncLen = _header._sync.length; // start scan for next sync position ... for (int i = 0; file.getFilePointer() < endPos; i++) { int j = 0; for (; j < syncLen; j++) { if (_header._sync[j] != syncCheck[(i + j) % syncLen]) break; } if (j == syncLen) { file.seek(file.getFilePointer() - LocalLogFileHeader.SYNC_BYTES_SIZE); // position before sync break; } syncCheck[i % syncLen] = file.readByte(); } // whatever, happened file pointer is at current pos currentPos = file.getFilePointer(); if (currentPos < endPos) { LOG.info("### Item Loader Found another sync point at:" + currentPos); } else { LOG.error("### No more sync points found!"); } } else { // ok figure out next steps based on header ... // for now, just add item to our list ... _fingerprintToLocalLogPos.put(itemHeader._fingerprint, _localLogStartOffset + currentPos); // now seek past data currentPos += CacheItemHeader.SIZE + itemHeader._dataLength + ITEM_RECORD_TRAILING_BYTES; } } } finally { if (file != null) { file.close(); } } }
From source file:com.joey.software.MoorFLSI.RepeatImageTextReader.java
public void loadTextData(File file) { try {/*from w w w . j av a 2 s . c o m*/ RandomAccessFile in = new RandomAccessFile(file, "r"); // Skip header in.readLine(); in.readLine(); in.readLine(); // Skip Subject Information in.readLine(); in.readLine(); in.readLine(); in.readLine(); in.readLine(); in.readLine(); String startTimeInput = in.readLine(); String commentsInput = in.readLine(); String data = in.readLine(); while (!data.startsWith("2) System Configuration")) { commentsInput += data; data = in.readLine(); } // System configuration // in.readLine(); in.readLine(); String timeCounstantInput = in.readLine(); String cameraGainInput = in.readLine(); String exposureTimeInput = in.readLine(); in.readLine(); in.readLine(); in.readLine(); String resolutionInput = in.readLine(); // Time Data in.readLine(); String timeDataInput = in.readLine(); String totalImagesInput = in.readLine(); in.readLine(); in.readLine(); // in.readLine(); // System.out.println(in.readLine()); // in.readLine(); // Parse important Size high = (new Scanner(resolutionInput.split(":")[1])).nextInt(); wide = (new Scanner(resolutionInput.split(",")[1])).nextInt(); int tot = 1; try { tot = (new Scanner(totalImagesInput.split(":")[1])).nextInt(); } catch (Exception e) { } System.out.println(wide + "," + high); // Parse timeInformation SimpleDateFormat format = new SimpleDateFormat("hh:mm:ss (dd/MM/yy)"); Date startTime = null; try { startTime = format.parse(startTimeInput.split(": ")[1]); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } String[] frameTimeData = timeDataInput.split("information:")[1].split(","); Date[] timeInfo = new Date[tot]; for (int i = 0; i < frameTimeData.length - 1; i++) { GregorianCalendar cal = new GregorianCalendar(); cal.setTime(startTime); String dat = (frameTimeData[i]); String[] timeVals = dat.split(":"); int hour = Integer.parseInt(StringOperations.removeNonNumber(timeVals[0])); int min = Integer.parseInt(StringOperations.removeNonNumber(timeVals[1])); int sec = Integer.parseInt(StringOperations.removeNonNumber(timeVals[2])); int msec = Integer.parseInt(StringOperations.removeNonNumber(timeVals[3])); cal.add(Calendar.HOUR_OF_DAY, hour); cal.add(Calendar.MINUTE, min); cal.add(Calendar.SECOND, sec); cal.add(Calendar.MILLISECOND, msec); timeInfo[i] = cal.getTime(); } // Parse Image Data /* * Close Random access file and switch to scanner first store pos * then move to correct point. */ long pos = in.getFilePointer(); in.close(); FileInputStream fIn = new FileInputStream(file); fIn.skip(pos); BufferedInputStream bIn = new BufferedInputStream(fIn); Scanner sIn = new Scanner(bIn); short[][][] holder = new short[tot][wide][high]; JFrame f = new JFrame(); f.setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE); StatusBarPanel stat = new StatusBarPanel(); stat.setMaximum(high); f.getContentPane().setLayout(new BorderLayout()); f.getContentPane().add(stat, BorderLayout.CENTER); f.setSize(200, 60); f.setVisible(true); for (int i = 0; i < tot; i++) { // Skip over the heading values stat.setStatusMessage("Loading " + i + " of " + tot); sIn.useDelimiter("\n"); sIn.next(); sIn.next(); sIn.next(); if (i != 0) { sIn.next(); } sIn.reset(); for (int y = 0; y < high; y++) { stat.setValue(y); sIn.nextInt(); for (int x = 0; x < wide; x++) { holder[i][x][y] = sIn.nextShort(); } } addData(timeInfo[i], holder[i]); } // FrameFactroy.getFrame(new DynamicRangeImage(data[0])); // Start Image Data } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:org.commoncrawl.service.crawler.CrawlList.java
private static int readTargetsFromLogFile(CrawlList domain, File logFileName, int desiredReadAmount, IntrusiveList<CrawlTarget> targetsOut) throws IOException { int itemsRead = 0; if (logFileName.exists()) { RandomAccessFile file = new RandomAccessFile(logFileName, "rw"); LogFileHeader header = new LogFileHeader(); try {/*from w ww . j a v a 2 s.com*/ long headerOffset = readLogFileHeader(file, header); // seelk to appropriate write position if (header._readPos != 0) file.seek(header._readPos); int itemsToRead = Math.min(desiredReadAmount, header._itemCount); PersistentCrawlTarget persistentTarget = new PersistentCrawlTarget(); CRC32 crc = new CRC32(); CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 16); for (int i = 0; i < itemsToRead; ++i) { // read length ... int urlDataLen = file.readInt(); long urlDataCRC = file.readLong(); buffer.reset(); if (urlDataLen > buffer.getBuffer().length) { buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536); } file.read(buffer.getBuffer(), 0, urlDataLen); crc.reset(); crc.update(buffer.getBuffer(), 0, urlDataLen); long computedValue = crc.getValue(); // validate crc values ... if (computedValue != urlDataCRC) { throw new IOException("Crawl Target Log File Corrupt"); } else { //populate a persistentTarget from the (in memory) data stream DataInputStream bufferReader = new DataInputStream( new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen)); persistentTarget.clear(); persistentTarget.readFields(bufferReader); //populate a new crawl target structure ... CrawlTarget newTarget = new CrawlTarget(domain, persistentTarget); targetsOut.addTail(newTarget); } } itemsRead = itemsToRead; // now update header ... header._itemCount -= itemsRead; // now if item count is non zero ... if (header._itemCount != 0) { // set read cursor to next record location header._readPos = file.getFilePointer(); } // otherwise ... else { // reset both cursors ... header._readPos = 0; header._writePos = 0; } // now write out header anew ... writeLogFileHeader(file, header); } finally { if (file != null) { file.close(); } } } return itemsRead; }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
void writeInitialSubDomainMetadataToDisk() throws IOException { RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw"); try {//from www . ja v a 2 s. com file.writeByte(0); // version file.writeInt(_transientSubDomainStats.size()); ArrayList<CrawlListMetadata> sortedMetadata = new ArrayList<CrawlListMetadata>(); sortedMetadata.addAll(_transientSubDomainStats.values()); _transientSubDomainStats = null; CrawlListMetadata metadataArray[] = sortedMetadata.toArray(new CrawlListMetadata[0]); Arrays.sort(metadataArray, new Comparator<CrawlListMetadata>() { @Override public int compare(CrawlListMetadata o1, CrawlListMetadata o2) { int result = ((Integer) o2.getUrlCount()).compareTo(o1.getUrlCount()); if (result == 0) { result = o1.getDomainName().compareTo(o2.getDomainName()); } return result; } }); DataOutputBuffer outputBuffer = new DataOutputBuffer(CrawlListMetadata.Constants.FixedDataSize); TreeMap<Long, Integer> idToOffsetMap = new TreeMap<Long, Integer>(); for (CrawlListMetadata entry : metadataArray) { // reset output buffer outputBuffer.reset(); // write item to disk entry.serialize(outputBuffer, new BinaryProtocol()); if (outputBuffer.getLength() > CrawlListMetadata.Constants.FixedDataSize) { LOG.fatal("Metadata Serialization for List:" + getListId() + " SubDomain:" + entry.getDomainName()); System.out.println("Metadata Serialization for List:" + getListId() + " SubDomain:" + entry.getDomainName()); } // save offset idToOffsetMap.put(entry.getDomainHash(), (int) file.getFilePointer()); // write out fixed data size file.write(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize); } // write lookup table _offsetLookupTable = new DataOutputBuffer(idToOffsetMap.size() * OFFSET_TABLE_ENTRY_SIZE); for (Map.Entry<Long, Integer> entry : idToOffsetMap.entrySet()) { _offsetLookupTable.writeLong(entry.getKey()); _offsetLookupTable.writeInt(entry.getValue()); } } finally { file.close(); } _transientSubDomainStats = null; }
From source file:edu.umass.cs.gigapaxos.SQLPaxosLogger.java
private static void compactLogfile(File file, PaxosPacketizer packetizer, MessageLogDiskMap msgLog, FileIDMap fidMap) throws IOException, JSONException { RandomAccessFile raf = null, rafTmp = null; File tmpFile = new File(file.toString() + TMP_FILE_SUFFIX); int tmpFileSize = 0; boolean compacted = false, neededAtAll = false; HashMap<String, ArrayList<LogIndexEntry>> logIndexEntries = new HashMap<String, ArrayList<LogIndexEntry>>(); // quick delete if (fidMap.isRemovable(file.toString(), msgLog)) { deleteFile(file, msgLog);//w w w . ja v a 2 s .c o m log.log(Level.INFO, "{0} quick-garbage-collected file {1}", new Object[] { msgLog.disk, file }); return; } else log.log(Level.FINE, "{0} not quick-GCing file {1} because dependent paxosIDs = {2}", new Object[] { msgLog.disk, file, fidMap.fidMap.get(file.toString()) }); if (System.currentTimeMillis() - file.lastModified() < LOGFILE_AGE_THRESHOLD * 1000) return; try { long t = System.currentTimeMillis(); raf = new RandomAccessFile(file.toString(), "r"); rafTmp = new RandomAccessFile(tmpFile.toString(), "rw"); while (raf.getFilePointer() < raf.length()) { long offset = rafTmp.getFilePointer(); int length = raf.readInt(); byte[] msg = new byte[length]; raf.readFully(msg); PaxosPacket pp = packetizer != null ? packetizer.stringToPaxosPacket(msg // new String(msg, CHARSET) ) : PaxosPacket.getPaxosPacket(new String(msg, CHARSET)); if (!logIndexEntries.containsKey(pp.getPaxosID())) logIndexEntries.put(pp.getPaxosID(), new ArrayList<LogIndexEntry>()); logIndexEntries.get(pp.getPaxosID()).add(new LogIndexEntry(getSlot(pp), getBallot(pp).ballotNumber, getBallot(pp).coordinatorID, pp.getType().getInt(), file.toString(), offset, length)); if (isLogMsgNeeded(pp, msgLog)) { ByteBuffer bbuf = ByteBuffer.allocate(length + 4); bbuf.putInt(length); bbuf.put(msg); rafTmp.write(bbuf.array()); neededAtAll = true; tmpFileSize += bbuf.capacity(); } else { compacted = true; log.log(Level.FINE, "From logfile {0} garbage collecting {1}", new Object[] { file, pp.getSummary() }); } } DelayProfiler.updateDelay("compact", t); } finally { if (raf != null) raf.close(); if (rafTmp != null) { rafTmp.getChannel().force(true); rafTmp.close(); } } assert (tmpFile.exists() && tmpFile.length() == tmpFileSize) : tmpFile.length() + " != " + tmpFileSize; if (compacted && neededAtAll) synchronized (msgLog) { modifyLogfileAndLogIndex(file, tmpFile, logIndexEntries, msgLog, fidMap); log.log(Level.INFO, "{0} compacted logfile {1}", new Object[] { msgLog, file }); } else if (!neededAtAll) { log.log(Level.INFO, "Deleting logfile {0} as its log entries are no longer needed", new Object[] { file }); deleteFile(file, msgLog); } else // !compacted log.log(Level.INFO, "Logfile {0} unchanged upon compaction attempt", new Object[] { file }); assert (tmpFile.toString().endsWith(TMP_FILE_SUFFIX)); if (tmpFile.exists()) deleteFile(tmpFile, msgLog); }
From source file:edu.umass.cs.gigapaxos.SQLPaxosLogger.java
private static void mergeLogfiles(File prev, File cur, PaxosPacketizer packetizer, MessageLogDiskMap msgLog, FileIDMap fidMap) throws IOException, JSONException { File tmpFile = new File(cur.toString() + TMP_FILE_SUFFIX); RandomAccessFile rafTmp = null, rafPrev = null, rafCur = null; long t = System.currentTimeMillis(); try {/*from w ww . java 2s .c o m*/ rafTmp = new RandomAccessFile(tmpFile.toString(), "rw"); rafPrev = new RandomAccessFile(prev.toString(), "r"); rafCur = new RandomAccessFile(cur.toString(), "r"); byte[] buf = new byte[1024]; int numRead = 0; // copy prev file to tmp file while ((numRead = rafPrev.read(buf)) > 0) rafTmp.write(buf, 0, numRead); // copy cur file to tmp file while ((numRead = rafCur.read(buf)) > 0) rafTmp.write(buf, 0, numRead); } finally { if (rafTmp != null) rafTmp.close(); if (rafPrev != null) rafPrev.close(); if (rafCur != null) rafCur.close(); } // copy tmp file index into memory HashMap<String, ArrayList<LogIndexEntry>> logIndexEntries = new HashMap<String, ArrayList<LogIndexEntry>>(); try { rafTmp = new RandomAccessFile(tmpFile.toString(), "r"); while (rafTmp.getFilePointer() < rafTmp.length()) { long offset = rafTmp.getFilePointer(); int length = rafTmp.readInt(); byte[] msg = new byte[length]; rafTmp.readFully(msg); PaxosPacket pp = packetizer != null ? packetizer.stringToPaxosPacket(msg // new String(msg, CHARSET) ) : PaxosPacket.getPaxosPacket(new String(msg, CHARSET)); assert (pp != null) : " read logged message " + new String(msg, CHARSET); if (!logIndexEntries.containsKey(pp.getPaxosID())) logIndexEntries.put(pp.getPaxosID(), new ArrayList<LogIndexEntry>()); logIndexEntries.get(pp.getPaxosID()).add(new LogIndexEntry(getSlot(pp), getBallot(pp).ballotNumber, getBallot(pp).coordinatorID, pp.getType().getInt(), cur.toString(), offset, length)); } } finally { if (rafTmp != null) rafTmp.close(); } // atomically copy tmpFile to cur, adjust log index, delete prev synchronized (msgLog) { modifyLogfileAndLogIndex(cur, tmpFile, logIndexEntries, msgLog, fidMap); if (prev.delete()) fidMap.remove(prev.toString()); } DelayProfiler.updateDelay("merge", t); log.log(Level.INFO, "{0} merged logfile {1} into {2}", new Object[] { msgLog, prev, cur }); }