Example usage for java.io RandomAccessFile getFilePointer

List of usage examples for java.io RandomAccessFile getFilePointer

Introduction

In this page you can find the example usage for java.io RandomAccessFile getFilePointer.

Prototype

public native long getFilePointer() throws IOException;

Source Link

Document

Returns the current offset in this file.

Usage

From source file:big.BigZip.java

/**
 * Version 2 that permits to extract the text from a compressed file without
 * creating any file on the disk.//from   ww w .  ja  v a2  s. c  o m
 * @param startPosition Offset where the file begins
 * @param endPosition   Offset where the file ends
 * @return      The source code of the compressed file
 */
public String extractBytesToRAM(final long startPosition, final Long endPosition) {

    String result = null;

    try {
        // enable random access to the BIG file (fast as heck)
        RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r");
        // jump directly to the position where the file is positioned
        dataBIG.seek(startPosition);
        // create a byte array
        ByteArrayOutputStream byteOutput = new ByteArrayOutputStream();

        // now we start reading bytes during the mentioned interval
        while (dataBIG.getFilePointer() < endPosition) {
            // read a byte from our BIG archive
            int data = dataBIG.read();
            byteOutput.write(data);
        }
        // flush data at this point
        byteOutput.flush();
        // now convert the stream from input into an output (to feed the zip stream)
        ByteArrayInputStream byteInput = new ByteArrayInputStream(byteOutput.toByteArray());
        // where we place the decompressed bytes
        ByteArrayOutputStream textOutput = new ByteArrayOutputStream();
        // create the zip streamer
        final ArchiveInputStream archiveStream;
        archiveStream = new ArchiveStreamFactory().createArchiveInputStream("zip", byteInput);
        final ZipArchiveEntry entry = (ZipArchiveEntry) archiveStream.getNextEntry();
        // copy all bytes from one location to the other (and decompress the data)
        IOUtils.copy(archiveStream, textOutput);
        // flush the results
        textOutput.flush();
        // we've got the result right here!
        result = textOutput.toString();
        // now close all the streams that we have open
        dataBIG.close();
        byteOutput.close();
        byteInput.close();
        textOutput.close();
        archiveStream.close();

    } catch (FileNotFoundException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    } catch (IOException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    } catch (ArchiveException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
    }

    return result;
}

From source file:org.carewebframework.api.logging.LogFileTailer.java

/**
 * Typically executed via a <code>new Thread(FileTailer).start()</code>
 */// ww  w. j av a2 s  .  c om
@Override
public void run() {
    // The file pointer keeps track of where we are in the file
    long filePointer = 0;
    final long startTime = new Date().getTime();

    // Determine start point
    if (this.startAtBeginning) {
        filePointer = 0;
    } else {
        filePointer = this.file.length();
    }

    try {
        // Start tailing
        this.tailing = true;
        RandomAccessFile file = new RandomAccessFile(this.file, "r");
        while (isTailing()) {
            //check to see if maxActiveInterval has been exceeded
            if (new Date().getTime() - startTime > this.maxActiveInterval) {
                if (log.isWarnEnabled()) {
                    log.warn("FileTailer exceeded maxActiveInterval: " + this.maxActiveInterval);
                }
                stopTailing();
                fireMaxActiveIntervalExceeded();
            }
            try {
                // Compare the length of the file to the file pointer
                final long fileLength = this.file.length();
                if (fileLength < filePointer) {
                    // file must have been rotated or deleted;
                    // reopen the file and reset the file pointer
                    file = new RandomAccessFile(this.file, "r");
                    filePointer = 0;
                }

                if (fileLength > filePointer) {
                    // There is data to read
                    file.seek(filePointer);
                    String line = file.readLine();
                    while (line != null) {
                        fireNewFileLine(line);
                        line = file.readLine();
                    }
                    filePointer = file.getFilePointer();
                }

                // Sleep for the specified interval
                Thread.sleep(this.interval);
            } catch (final Exception e) {
                log.error(e.getMessage(), e);
            }
        }

        // Close the file that we are tailing
        file.close();
    } catch (final Exception e) {
        log.error(e.getMessage(), e);
    }
}

From source file:org.commoncrawl.service.crawler.CrawlList.java

private static long writeLogFileHeader(RandomAccessFile file, LogFileHeader header) throws IOException {

    // set the position at zero .. 
    file.seek(0);/*w  w  w  . j  av a2 s . c  o  m*/
    // and write header to disk ... 
    header.writeHeader(file);

    //took sync out because it was becoming a sever bottleneck
    // file.getFD().sync();

    return file.getFilePointer();
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

public static void dumpUnCrawledItems(File dataDir, long listId, File outputFilePath,
        boolean includeRobotsExcludedItems) throws IOException {

    File fixedDataFile = new File(dataDir, LIST_VALUE_MAP_PREFIX + Long.toString(listId));
    File variableDataFile = new File(dataDir, LIST_STRING_MAP_PREFIX + Long.toString(listId));

    LOG.info("FixedDataFile is:" + fixedDataFile);
    LOG.info("VariableDataFile is:" + variableDataFile);

    RandomAccessFile fixedDataReader = new RandomAccessFile(fixedDataFile, "r");
    RandomAccessFile stringDataReader = new RandomAccessFile(variableDataFile, "r");

    JsonWriter writer = new JsonWriter(new BufferedWriter(new FileWriter(outputFilePath), 1024 * 1024 * 10));

    writer.setIndent(" ");

    try {/*from   w w  w .  j  ava 2s .  c om*/
        writer.beginObject();
        writer.name("urls");
        writer.beginArray();
        try {

            OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();
            URLFP fingerprint = new URLFP();

            while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {

                long position = fixedDataReader.getFilePointer();

                item.deserialize(fixedDataReader);

                // seek to string data 
                stringDataReader.seek(item._stringsOffset);
                // and skip buffer length 
                WritableUtils.readVInt(stringDataReader);
                // and read primary string 
                String url = stringDataReader.readUTF();
                // setup fingerprint 
                fingerprint.setDomainHash(item._domainHash);
                fingerprint.setUrlHash(item._urlFingerprint);

                // any item that has not been crawled needs to be queued 
                boolean queueItem = !item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS);

                // if item is not queued, check to see if we need to retry the item 
                if (!queueItem && item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) {

                    if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) {

                        queueItem = (item._redirectStatus != 0);

                        if (!queueItem) {
                            if (item._redirectHttpResult != 200 && item._redirectHttpResult != 404) {
                                queueItem = true;
                            }
                        }
                    } else {
                        queueItem = (item._crawlStatus != 0);

                        if (!queueItem) {
                            if (item._httpResultCode != 200 && item._httpResultCode != 404) {
                                queueItem = true;
                            }
                        }
                    }
                }

                if (queueItem) {
                    // ok if queue item is set ... 
                    writer.beginObject();
                    writer.name("url");
                    writer.value(url);
                    writer.name("redirected");
                    writer.value((boolean) item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS));
                    writer.name("lastStatus");
                    if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) {
                        if (item._redirectStatus == 0) {
                            writer.value("HTTP-" + item._redirectHttpResult);
                        } else {
                            writer.value(CrawlURL.FailureReason.toString(item._redirectHttpResult));
                        }
                    } else {
                        if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) {
                            if (item._crawlStatus == 0) {
                                writer.value("HTTP-" + item._httpResultCode);
                            } else {
                                writer.value(CrawlURL.FailureReason.toString(item._crawlStatus));
                            }
                        } else {
                            writer.value("UNCRAWLED");
                        }
                    }
                    writer.name("updateTime");
                    writer.value(item._updateTimestamp);
                    writer.endObject();
                }
            }
        } catch (IOException e) {
            LOG.error("Encountered Exception Queueing Items for List:" + listId + " Exception:"
                    + CCStringUtils.stringifyException(e));
        } finally {
            fixedDataReader.close();
            stringDataReader.close();
        }

        writer.endArray();
        writer.endObject();
    } catch (Exception e) {
        LOG.error(CCStringUtils.stringifyException(e));
        throw new IOException(e);
    } finally {
        writer.flush();
        writer.close();
    }

}

From source file:org.commoncrawl.service.listcrawler.CrawlHistoryManager.java

/**
 * seek out next instance of sync bytes in the file input stream
 * /*from   w  w w. j a va 2 s  . c o  m*/
 * @param file
 * @throws IOException
 */
private boolean seekToNextSyncBytesPos(RandomAccessFile file) throws IOException {
    // read in a sync.length buffer amount
    file.read(_syncByteBuffer);

    int syncLen = _header._sync.length;

    // start scan for next sync position ...
    for (int i = 0; file.getFilePointer() < _header._fileSize; i++) {
        int j = 0;
        for (; j < syncLen; j++) {
            if (_header._sync[j] != _syncByteBuffer[(i + j) % syncLen])
                break;
        }
        if (j == syncLen) {
            // found matching sync bytes - reset file pos to before sync bytes
            file.seek(file.getFilePointer() - LocalLogFileHeader.SYNC_BYTES_SIZE); // position
                                                                                   // before
                                                                                   // sync
            return true;
        }
        _syncByteBuffer[i % syncLen] = file.readByte();
    }
    return false;
}

From source file:org.commoncrawl.service.crawler.CrawlLog.java

public static void walkCrawlLogFile(File crawlLogPath, long startOffset) throws IOException {

    // and open the crawl log file ...
    RandomAccessFile inputStream = null;

    IOException exception = null;

    CRC32 crc = new CRC32();
    CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 17);
    byte[] syncBytesBuffer = new byte[SYNC_BYTES_SIZE];

    // save position for potential debug output.
    long lastReadPosition = 0;

    try {//ww w .j a  v a 2s .c  o m
        inputStream = new RandomAccessFile(crawlLogPath, "rw");

        // and a data input stream ...
        RandomAccessFile reader = inputStream;
        // seek to zero
        reader.seek(0L);

        // read the header ...
        LogFileHeader header = readLogFileHeader(reader);

        System.out.println("Header ItemCount:" + header._itemCount + " FileSize:" + header._fileSize);

        if (startOffset != 0L) {
            System.out.println("Preseeking to:" + startOffset);
            reader.seek(startOffset);
        }

        Configuration conf = new Configuration();

        // read a crawl url from the stream...

        long recordCount = 0;
        while (inputStream.getFilePointer() < header._fileSize) {

            // System.out.println("PRE-SYNC SeekPos:"+
            // inputStream.getFilePointer());
            if (seekToNextSyncBytesPos(syncBytesBuffer, reader, header._fileSize)) {

                // System.out.println("POST-SYNC SeekPos:"+
                // inputStream.getFilePointer());

                lastReadPosition = inputStream.getFilePointer();

                // skip sync
                inputStream.skipBytes(SYNC_BYTES_SIZE);

                // read length ...
                int urlDataLen = reader.readInt();
                long urlDataCRC = reader.readLong();

                if (urlDataLen > buffer.getBuffer().length) {
                    buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536);
                }
                reader.read(buffer.getBuffer(), 0, urlDataLen);
                crc.reset();
                crc.update(buffer.getBuffer(), 0, urlDataLen);

                long computedValue = crc.getValue();

                // validate crc values ...
                if (computedValue != urlDataCRC) {
                    LOG.error("CRC Mismatch Detected during HDFS transfer in CrawlLog:"
                            + crawlLogPath.getAbsolutePath() + " FilePosition:" + lastReadPosition);
                    inputStream.seek(lastReadPosition + 1);
                } else {
                    if (recordCount++ % 10000 == 0) {
                        // allocate a crawl url data structure
                        CrawlURL url = new CrawlURL();
                        DataInputStream bufferReader = new DataInputStream(
                                new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen));
                        // populate it from the (in memory) data stream
                        url.readFields(bufferReader);

                        System.out.println("Record:" + recordCount + " At:" + lastReadPosition + " URL:"
                                + url.getUrl() + " BuffSize:" + urlDataLen + " ContentLen:"
                                + url.getContentRaw().getCount() + " LastModified:"
                                + new Date(url.getLastAttemptTime()).toString());
                    }
                }
            } else {
                break;
            }
        }
    } catch (EOFException e) {
        LOG.error("Caught EOF Exception during read of local CrawlLog:" + crawlLogPath.getAbsolutePath()
                + " FilePosition:" + lastReadPosition);
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        exception = e;
        throw e;
    } finally {
        if (inputStream != null)
            inputStream.close();
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlHistoryManager.java

/**
 * /*  w  w w.  j ava2 s . co m*/
 * @return a sorted map of urlfp to item
 * @throws IOException
 */
TreeMap<URLFP, ProxyCrawlHistoryItem> loadLocalLogItemMap() throws IOException {

    TreeMap<URLFP, ProxyCrawlHistoryItem> itemMap = new TreeMap<URLFP, ProxyCrawlHistoryItem>();

    LOG.info("Reading Local Log File");
    RandomAccessFile file = new RandomAccessFile(getActiveLogFilePath(), "rw");

    // valid length indicator ...
    long validLength = 0;

    try {
        // skip header ...
        file.seek(LocalLogFileHeader.SIZE);
        validLength = file.getFilePointer();
        // ok walk n items ...
        for (int itemIdx = 0; itemIdx < _header._itemCount
                && file.getChannel().position() <= _header._fileSize; ++itemIdx) {
            try {
                ProxyCrawlHistoryItem item = readItem(file);
                // update valid length ...
                validLength = file.getFilePointer();
                // ok compute fingerprint for item ...
                URLFP fingerprintObject = URLUtils.getURLFPFromURL(item.getOriginalURL(), true);
                if (fingerprintObject == null) {
                    LOG.error("Could not compute fingerprint for URL:" + item.getOriginalURL());
                } else {
                    itemMap.put(fingerprintObject, item);
                }
            } catch (IOException e) {
                LOG.error(CCStringUtils.stringifyException(e));
                try {
                    if (!seekToNextSyncBytesPos(file)) {
                        LOG.error("Hit EOF While Seeking for next SyncByte Sequence!");
                        break;
                    } else {
                        LOG.info("Seek to Next SyncByte Succeeded! Continuing Load");
                    }
                } catch (IOException e2) {
                    LOG.error(CCStringUtils.stringifyException(e2));
                    LOG.error("Got IO Exception Reading SyncBytes - Bailing!");
                    break;
                }
            }
        }
    } finally {
        if (file.length() > validLength) {
            LOG.warn("File Length is:" + file.length() + " Truncating Length to:" + validLength);
            file.setLength(validLength);
        }

        file.close();
    }
    LOG.info("Done Reading Local Log File");

    return itemMap;
}

From source file:big.BigZip.java

/**
 * Version 2 that permits to extract the text from a compressed file without
 * creating any file on the disk./*from  www.j  av  a  2s .com*/
 * @param filePosition
 * @return      The source code of the compressed file
 */
public String extractBytesToRAM(final long filePosition) {

    String result = null;

    try {

        // add the signature bytes to our start position
        long startPosition = filePosition + magicSignature.length();

        // enable random access to the BIG file (fast as heck)
        RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r");
        // jump directly to the position where the file is positioned
        dataBIG.seek(startPosition);
        // create a byte array
        ByteArrayOutputStream byteOutput = new ByteArrayOutputStream();

        // get the end of this file entry (by brute-force)
        char test = 0;
        long endPosition = -1;
        while (test != -1) {
            test = dataBIG.readChar();
            // if the magic devil number was found..
            if (test == 66) {
                // read the next value for confirmation
                byte value = dataBIG.readByte();
                if (value != 73) {
                    continue;
                }
                // we found the next entry
                endPosition = dataBIG.getFilePointer() - 1;
                break;
            }
        }

        // rewind back to the start position
        dataBIG.seek(startPosition);

        // now we start reading bytes during the mentioned interval
        while (dataBIG.getFilePointer() < endPosition) {
            // read a byte from our BIG archive
            int data = dataBIG.read();
            byteOutput.write(data);
        }
        // flush data at this point
        byteOutput.flush();
        // now convert the stream from input into an output (to feed the zip stream)
        ByteArrayInputStream byteInput = new ByteArrayInputStream(byteOutput.toByteArray());
        // where we place the decompressed bytes
        ByteArrayOutputStream textOutput = new ByteArrayOutputStream();
        // create the zip streamer
        final ArchiveInputStream archiveStream;
        archiveStream = new ArchiveStreamFactory().createArchiveInputStream("zip", byteInput);
        final ZipArchiveEntry entry = (ZipArchiveEntry) archiveStream.getNextEntry();
        // copy all bytes from one location to the other (and decompress the data)
        IOUtils.copy(archiveStream, textOutput);
        // flush the results
        textOutput.flush();
        // we've got the result right here!
        result = textOutput.toString();
        // now close all the streams that we have open
        dataBIG.close();
        byteOutput.close();
        byteInput.close();
        textOutput.close();
        archiveStream.close();

    } catch (FileNotFoundException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    } catch (IOException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    } catch (ArchiveException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
    }

    return result;
}

From source file:com.joey.software.MoorFLSI.RepeatImageTextReader.java

public void loadTextDataFluxSingle(File file) {
    try {//from   w w w.  j a va2 s.  co m
        RandomAccessFile in = new RandomAccessFile(file, "r");

        // Skip header
        in.readLine();
        in.readLine();
        in.readLine();

        // Skip Subject Information
        in.readLine();
        in.readLine();
        in.readLine();
        in.readLine();
        in.readLine();
        in.readLine();
        String startTimeInput = in.readLine();
        String commentsInput = in.readLine();

        String data = in.readLine();
        while (!data.startsWith("2) System Configuration")) {
            commentsInput += data;
            data = in.readLine();
        }
        // System configuration

        // in.readLine();
        in.readLine();
        String timeCounstantInput = in.readLine();
        String cameraGainInput = in.readLine();
        String exposureTimeInput = in.readLine();
        in.readLine();
        in.readLine();
        in.readLine();
        String resolutionInput = in.readLine();

        // in.readLine();
        // System.out.println(in.readLine());
        // in.readLine();

        // Parse important Size

        high = (new Scanner(resolutionInput.split(":")[1])).nextInt();
        wide = (new Scanner(resolutionInput.split(",")[1])).nextInt();
        int tot = 1;
        while (!data.startsWith("3) Flux Image Data")) {
            System.out.println(data);
            data = in.readLine();
        }
        in.readLine();
        // Parse Image Data
        /*
         * Close Random access file and switch to scanner first store pos
         * then move to correct point.
         */
        long pos = in.getFilePointer();
        in.close();

        FileInputStream fIn = new FileInputStream(file);
        fIn.skip(pos);

        BufferedInputStream bIn = new BufferedInputStream(fIn);
        Scanner sIn = new Scanner(bIn);

        short[][] holder = new short[wide][high];

        JFrame f = new JFrame();
        f.setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE);

        StatusBarPanel stat = new StatusBarPanel();
        stat.setMaximum(high);
        f.getContentPane().setLayout(new BorderLayout());
        f.getContentPane().add(stat, BorderLayout.CENTER);
        f.setSize(200, 60);
        f.setVisible(true);

        // Skip over the heading values

        sIn.reset();

        for (int y = 0; y < high; y++) {
            System.out.println(sIn.nextInt());
            try {
                for (int x = 0; x < wide; x++) {
                    holder[x][y] = sIn.nextShort();
                }
            } catch (Throwable e) {

            }

        }
        addData(new Date(), holder);

        FrameFactroy.getFrame(new DynamicRangeImage(holder));
        // Start Image Data

    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

private void dumpFixedDataFile() {
    try {/*from  w  ww  .  ja va  2s . com*/
        RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw");

        try {
            OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();
            int index = 0;
            while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {
                item.deserialize(fixedDataReader);
                LOG.info("Item at Index:" + index++ + " Domain:" + item._domainHash + " URLFP:"
                        + item._urlFingerprint);
            }
        } finally {
            fixedDataReader.close();
        }
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
    }

}