List of usage examples for java.io RandomAccessFile getFilePointer
public native long getFilePointer() throws IOException;
From source file:big.BigZip.java
/** * Version 2 that permits to extract the text from a compressed file without * creating any file on the disk.//from ww w . ja v a2 s. c o m * @param startPosition Offset where the file begins * @param endPosition Offset where the file ends * @return The source code of the compressed file */ public String extractBytesToRAM(final long startPosition, final Long endPosition) { String result = null; try { // enable random access to the BIG file (fast as heck) RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r"); // jump directly to the position where the file is positioned dataBIG.seek(startPosition); // create a byte array ByteArrayOutputStream byteOutput = new ByteArrayOutputStream(); // now we start reading bytes during the mentioned interval while (dataBIG.getFilePointer() < endPosition) { // read a byte from our BIG archive int data = dataBIG.read(); byteOutput.write(data); } // flush data at this point byteOutput.flush(); // now convert the stream from input into an output (to feed the zip stream) ByteArrayInputStream byteInput = new ByteArrayInputStream(byteOutput.toByteArray()); // where we place the decompressed bytes ByteArrayOutputStream textOutput = new ByteArrayOutputStream(); // create the zip streamer final ArchiveInputStream archiveStream; archiveStream = new ArchiveStreamFactory().createArchiveInputStream("zip", byteInput); final ZipArchiveEntry entry = (ZipArchiveEntry) archiveStream.getNextEntry(); // copy all bytes from one location to the other (and decompress the data) IOUtils.copy(archiveStream, textOutput); // flush the results textOutput.flush(); // we've got the result right here! result = textOutput.toString(); // now close all the streams that we have open dataBIG.close(); byteOutput.close(); byteInput.close(); textOutput.close(); archiveStream.close(); } catch (FileNotFoundException ex) { Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex); return null; } catch (IOException ex) { Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex); return null; } catch (ArchiveException ex) { Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex); } return result; }
From source file:org.carewebframework.api.logging.LogFileTailer.java
/** * Typically executed via a <code>new Thread(FileTailer).start()</code> */// ww w. j av a2 s . c om @Override public void run() { // The file pointer keeps track of where we are in the file long filePointer = 0; final long startTime = new Date().getTime(); // Determine start point if (this.startAtBeginning) { filePointer = 0; } else { filePointer = this.file.length(); } try { // Start tailing this.tailing = true; RandomAccessFile file = new RandomAccessFile(this.file, "r"); while (isTailing()) { //check to see if maxActiveInterval has been exceeded if (new Date().getTime() - startTime > this.maxActiveInterval) { if (log.isWarnEnabled()) { log.warn("FileTailer exceeded maxActiveInterval: " + this.maxActiveInterval); } stopTailing(); fireMaxActiveIntervalExceeded(); } try { // Compare the length of the file to the file pointer final long fileLength = this.file.length(); if (fileLength < filePointer) { // file must have been rotated or deleted; // reopen the file and reset the file pointer file = new RandomAccessFile(this.file, "r"); filePointer = 0; } if (fileLength > filePointer) { // There is data to read file.seek(filePointer); String line = file.readLine(); while (line != null) { fireNewFileLine(line); line = file.readLine(); } filePointer = file.getFilePointer(); } // Sleep for the specified interval Thread.sleep(this.interval); } catch (final Exception e) { log.error(e.getMessage(), e); } } // Close the file that we are tailing file.close(); } catch (final Exception e) { log.error(e.getMessage(), e); } }
From source file:org.commoncrawl.service.crawler.CrawlList.java
private static long writeLogFileHeader(RandomAccessFile file, LogFileHeader header) throws IOException { // set the position at zero .. file.seek(0);/*w w w . j av a2 s . c o m*/ // and write header to disk ... header.writeHeader(file); //took sync out because it was becoming a sever bottleneck // file.getFD().sync(); return file.getFilePointer(); }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
public static void dumpUnCrawledItems(File dataDir, long listId, File outputFilePath, boolean includeRobotsExcludedItems) throws IOException { File fixedDataFile = new File(dataDir, LIST_VALUE_MAP_PREFIX + Long.toString(listId)); File variableDataFile = new File(dataDir, LIST_STRING_MAP_PREFIX + Long.toString(listId)); LOG.info("FixedDataFile is:" + fixedDataFile); LOG.info("VariableDataFile is:" + variableDataFile); RandomAccessFile fixedDataReader = new RandomAccessFile(fixedDataFile, "r"); RandomAccessFile stringDataReader = new RandomAccessFile(variableDataFile, "r"); JsonWriter writer = new JsonWriter(new BufferedWriter(new FileWriter(outputFilePath), 1024 * 1024 * 10)); writer.setIndent(" "); try {/*from w w w . j ava 2s . c om*/ writer.beginObject(); writer.name("urls"); writer.beginArray(); try { OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); URLFP fingerprint = new URLFP(); while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { long position = fixedDataReader.getFilePointer(); item.deserialize(fixedDataReader); // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // and read primary string String url = stringDataReader.readUTF(); // setup fingerprint fingerprint.setDomainHash(item._domainHash); fingerprint.setUrlHash(item._urlFingerprint); // any item that has not been crawled needs to be queued boolean queueItem = !item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS); // if item is not queued, check to see if we need to retry the item if (!queueItem && item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) { if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) { queueItem = (item._redirectStatus != 0); if (!queueItem) { if (item._redirectHttpResult != 200 && item._redirectHttpResult != 404) { queueItem = true; } } } else { queueItem = (item._crawlStatus != 0); if (!queueItem) { if (item._httpResultCode != 200 && item._httpResultCode != 404) { queueItem = true; } } } } if (queueItem) { // ok if queue item is set ... writer.beginObject(); writer.name("url"); writer.value(url); writer.name("redirected"); writer.value((boolean) item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)); writer.name("lastStatus"); if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) { if (item._redirectStatus == 0) { writer.value("HTTP-" + item._redirectHttpResult); } else { writer.value(CrawlURL.FailureReason.toString(item._redirectHttpResult)); } } else { if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) { if (item._crawlStatus == 0) { writer.value("HTTP-" + item._httpResultCode); } else { writer.value(CrawlURL.FailureReason.toString(item._crawlStatus)); } } else { writer.value("UNCRAWLED"); } } writer.name("updateTime"); writer.value(item._updateTimestamp); writer.endObject(); } } } catch (IOException e) { LOG.error("Encountered Exception Queueing Items for List:" + listId + " Exception:" + CCStringUtils.stringifyException(e)); } finally { fixedDataReader.close(); stringDataReader.close(); } writer.endArray(); writer.endObject(); } catch (Exception e) { LOG.error(CCStringUtils.stringifyException(e)); throw new IOException(e); } finally { writer.flush(); writer.close(); } }
From source file:org.commoncrawl.service.listcrawler.CrawlHistoryManager.java
/** * seek out next instance of sync bytes in the file input stream * /*from w w w. j a va 2 s . c o m*/ * @param file * @throws IOException */ private boolean seekToNextSyncBytesPos(RandomAccessFile file) throws IOException { // read in a sync.length buffer amount file.read(_syncByteBuffer); int syncLen = _header._sync.length; // start scan for next sync position ... for (int i = 0; file.getFilePointer() < _header._fileSize; i++) { int j = 0; for (; j < syncLen; j++) { if (_header._sync[j] != _syncByteBuffer[(i + j) % syncLen]) break; } if (j == syncLen) { // found matching sync bytes - reset file pos to before sync bytes file.seek(file.getFilePointer() - LocalLogFileHeader.SYNC_BYTES_SIZE); // position // before // sync return true; } _syncByteBuffer[i % syncLen] = file.readByte(); } return false; }
From source file:org.commoncrawl.service.crawler.CrawlLog.java
public static void walkCrawlLogFile(File crawlLogPath, long startOffset) throws IOException { // and open the crawl log file ... RandomAccessFile inputStream = null; IOException exception = null; CRC32 crc = new CRC32(); CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 17); byte[] syncBytesBuffer = new byte[SYNC_BYTES_SIZE]; // save position for potential debug output. long lastReadPosition = 0; try {//ww w .j a v a 2s .c o m inputStream = new RandomAccessFile(crawlLogPath, "rw"); // and a data input stream ... RandomAccessFile reader = inputStream; // seek to zero reader.seek(0L); // read the header ... LogFileHeader header = readLogFileHeader(reader); System.out.println("Header ItemCount:" + header._itemCount + " FileSize:" + header._fileSize); if (startOffset != 0L) { System.out.println("Preseeking to:" + startOffset); reader.seek(startOffset); } Configuration conf = new Configuration(); // read a crawl url from the stream... long recordCount = 0; while (inputStream.getFilePointer() < header._fileSize) { // System.out.println("PRE-SYNC SeekPos:"+ // inputStream.getFilePointer()); if (seekToNextSyncBytesPos(syncBytesBuffer, reader, header._fileSize)) { // System.out.println("POST-SYNC SeekPos:"+ // inputStream.getFilePointer()); lastReadPosition = inputStream.getFilePointer(); // skip sync inputStream.skipBytes(SYNC_BYTES_SIZE); // read length ... int urlDataLen = reader.readInt(); long urlDataCRC = reader.readLong(); if (urlDataLen > buffer.getBuffer().length) { buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536); } reader.read(buffer.getBuffer(), 0, urlDataLen); crc.reset(); crc.update(buffer.getBuffer(), 0, urlDataLen); long computedValue = crc.getValue(); // validate crc values ... if (computedValue != urlDataCRC) { LOG.error("CRC Mismatch Detected during HDFS transfer in CrawlLog:" + crawlLogPath.getAbsolutePath() + " FilePosition:" + lastReadPosition); inputStream.seek(lastReadPosition + 1); } else { if (recordCount++ % 10000 == 0) { // allocate a crawl url data structure CrawlURL url = new CrawlURL(); DataInputStream bufferReader = new DataInputStream( new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen)); // populate it from the (in memory) data stream url.readFields(bufferReader); System.out.println("Record:" + recordCount + " At:" + lastReadPosition + " URL:" + url.getUrl() + " BuffSize:" + urlDataLen + " ContentLen:" + url.getContentRaw().getCount() + " LastModified:" + new Date(url.getLastAttemptTime()).toString()); } } } else { break; } } } catch (EOFException e) { LOG.error("Caught EOF Exception during read of local CrawlLog:" + crawlLogPath.getAbsolutePath() + " FilePosition:" + lastReadPosition); } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); exception = e; throw e; } finally { if (inputStream != null) inputStream.close(); } }
From source file:org.commoncrawl.service.listcrawler.CrawlHistoryManager.java
/** * /* w w w. j ava2 s . co m*/ * @return a sorted map of urlfp to item * @throws IOException */ TreeMap<URLFP, ProxyCrawlHistoryItem> loadLocalLogItemMap() throws IOException { TreeMap<URLFP, ProxyCrawlHistoryItem> itemMap = new TreeMap<URLFP, ProxyCrawlHistoryItem>(); LOG.info("Reading Local Log File"); RandomAccessFile file = new RandomAccessFile(getActiveLogFilePath(), "rw"); // valid length indicator ... long validLength = 0; try { // skip header ... file.seek(LocalLogFileHeader.SIZE); validLength = file.getFilePointer(); // ok walk n items ... for (int itemIdx = 0; itemIdx < _header._itemCount && file.getChannel().position() <= _header._fileSize; ++itemIdx) { try { ProxyCrawlHistoryItem item = readItem(file); // update valid length ... validLength = file.getFilePointer(); // ok compute fingerprint for item ... URLFP fingerprintObject = URLUtils.getURLFPFromURL(item.getOriginalURL(), true); if (fingerprintObject == null) { LOG.error("Could not compute fingerprint for URL:" + item.getOriginalURL()); } else { itemMap.put(fingerprintObject, item); } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); try { if (!seekToNextSyncBytesPos(file)) { LOG.error("Hit EOF While Seeking for next SyncByte Sequence!"); break; } else { LOG.info("Seek to Next SyncByte Succeeded! Continuing Load"); } } catch (IOException e2) { LOG.error(CCStringUtils.stringifyException(e2)); LOG.error("Got IO Exception Reading SyncBytes - Bailing!"); break; } } } } finally { if (file.length() > validLength) { LOG.warn("File Length is:" + file.length() + " Truncating Length to:" + validLength); file.setLength(validLength); } file.close(); } LOG.info("Done Reading Local Log File"); return itemMap; }
From source file:big.BigZip.java
/** * Version 2 that permits to extract the text from a compressed file without * creating any file on the disk./*from www.j av a 2s .com*/ * @param filePosition * @return The source code of the compressed file */ public String extractBytesToRAM(final long filePosition) { String result = null; try { // add the signature bytes to our start position long startPosition = filePosition + magicSignature.length(); // enable random access to the BIG file (fast as heck) RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r"); // jump directly to the position where the file is positioned dataBIG.seek(startPosition); // create a byte array ByteArrayOutputStream byteOutput = new ByteArrayOutputStream(); // get the end of this file entry (by brute-force) char test = 0; long endPosition = -1; while (test != -1) { test = dataBIG.readChar(); // if the magic devil number was found.. if (test == 66) { // read the next value for confirmation byte value = dataBIG.readByte(); if (value != 73) { continue; } // we found the next entry endPosition = dataBIG.getFilePointer() - 1; break; } } // rewind back to the start position dataBIG.seek(startPosition); // now we start reading bytes during the mentioned interval while (dataBIG.getFilePointer() < endPosition) { // read a byte from our BIG archive int data = dataBIG.read(); byteOutput.write(data); } // flush data at this point byteOutput.flush(); // now convert the stream from input into an output (to feed the zip stream) ByteArrayInputStream byteInput = new ByteArrayInputStream(byteOutput.toByteArray()); // where we place the decompressed bytes ByteArrayOutputStream textOutput = new ByteArrayOutputStream(); // create the zip streamer final ArchiveInputStream archiveStream; archiveStream = new ArchiveStreamFactory().createArchiveInputStream("zip", byteInput); final ZipArchiveEntry entry = (ZipArchiveEntry) archiveStream.getNextEntry(); // copy all bytes from one location to the other (and decompress the data) IOUtils.copy(archiveStream, textOutput); // flush the results textOutput.flush(); // we've got the result right here! result = textOutput.toString(); // now close all the streams that we have open dataBIG.close(); byteOutput.close(); byteInput.close(); textOutput.close(); archiveStream.close(); } catch (FileNotFoundException ex) { Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex); return null; } catch (IOException ex) { Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex); return null; } catch (ArchiveException ex) { Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex); } return result; }
From source file:com.joey.software.MoorFLSI.RepeatImageTextReader.java
public void loadTextDataFluxSingle(File file) { try {//from w w w. j a va2 s. co m RandomAccessFile in = new RandomAccessFile(file, "r"); // Skip header in.readLine(); in.readLine(); in.readLine(); // Skip Subject Information in.readLine(); in.readLine(); in.readLine(); in.readLine(); in.readLine(); in.readLine(); String startTimeInput = in.readLine(); String commentsInput = in.readLine(); String data = in.readLine(); while (!data.startsWith("2) System Configuration")) { commentsInput += data; data = in.readLine(); } // System configuration // in.readLine(); in.readLine(); String timeCounstantInput = in.readLine(); String cameraGainInput = in.readLine(); String exposureTimeInput = in.readLine(); in.readLine(); in.readLine(); in.readLine(); String resolutionInput = in.readLine(); // in.readLine(); // System.out.println(in.readLine()); // in.readLine(); // Parse important Size high = (new Scanner(resolutionInput.split(":")[1])).nextInt(); wide = (new Scanner(resolutionInput.split(",")[1])).nextInt(); int tot = 1; while (!data.startsWith("3) Flux Image Data")) { System.out.println(data); data = in.readLine(); } in.readLine(); // Parse Image Data /* * Close Random access file and switch to scanner first store pos * then move to correct point. */ long pos = in.getFilePointer(); in.close(); FileInputStream fIn = new FileInputStream(file); fIn.skip(pos); BufferedInputStream bIn = new BufferedInputStream(fIn); Scanner sIn = new Scanner(bIn); short[][] holder = new short[wide][high]; JFrame f = new JFrame(); f.setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE); StatusBarPanel stat = new StatusBarPanel(); stat.setMaximum(high); f.getContentPane().setLayout(new BorderLayout()); f.getContentPane().add(stat, BorderLayout.CENTER); f.setSize(200, 60); f.setVisible(true); // Skip over the heading values sIn.reset(); for (int y = 0; y < high; y++) { System.out.println(sIn.nextInt()); try { for (int x = 0; x < wide; x++) { holder[x][y] = sIn.nextShort(); } } catch (Throwable e) { } } addData(new Date(), holder); FrameFactroy.getFrame(new DynamicRangeImage(holder)); // Start Image Data } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
private void dumpFixedDataFile() { try {/*from w ww . ja va 2s . com*/ RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw"); try { OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); int index = 0; while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { item.deserialize(fixedDataReader); LOG.info("Item at Index:" + index++ + " Domain:" + item._domainHash + " URLFP:" + item._urlFingerprint); } } finally { fixedDataReader.close(); } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } }