Example usage for java.io RandomAccessFile seek

Introduction

In this page you can find the example usage for java.io RandomAccessFile seek.

Prototype

public void seek(long pos) throws IOException

Source Link

Document

Sets the file-pointer offset, measured from the beginning of this file, at which the next read or write occurs.

Usage

From source file:au.org.ala.layers.intersect.Grid.java

/**
 * buffering on top of RandomAccessFile//from   w  w  w  .  j  a v  a  2 s  . c om
 */
private byte getByte(RandomAccessFile raf, byte[] buffer, Long bufferOffset, long seekTo) throws IOException {
    long relativePos = seekTo - bufferOffset;
    if (relativePos < 0) {
        raf.seek(seekTo);
        bufferOffset = seekTo;
        raf.read(buffer);
        return buffer[0];
    } else if (relativePos >= 0 && relativePos < buffer.length) {
        return buffer[(int) relativePos];
    } else if (relativePos - buffer.length < buffer.length) {
        bufferOffset += buffer.length;
        raf.read(buffer);
        return buffer[(int) (relativePos - buffer.length)];
    } else {
        raf.seek(seekTo);
        bufferOffset = seekTo;
        raf.read(buffer);
        return buffer[0];
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

/**
 * serialize metadata to disk //  www.  jav  a  2s.com
 * @throws IOException
 */
void writeMetadataToDisk() throws IOException {

    synchronized (_metadata) {
        RandomAccessFile file = new RandomAccessFile(_listMetadataFile, "rw");
        try {
            file.seek(0);
            _metadata.serialize(file, new BinaryProtocol());
        } finally {
            file.close();
        }
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

private ProxyCrawlHistoryItem getHistoryItemFromOnDiskItem(OnDiskCrawlHistoryItem item) throws IOException {

    ProxyCrawlHistoryItem itemOut = new ProxyCrawlHistoryItem();

    if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS) != 0)
        itemOut.setCrawlStatus(item._crawlStatus);
    if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_ORIGINAL_RESULT_CODE) != 0)
        itemOut.setHttpResultCode(item._httpResultCode);
    if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS) != 0)
        itemOut.setRedirectStatus(item._redirectStatus);
    if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_RESULT_CODE) != 0)
        itemOut.setRedirectHttpResult(item._redirectHttpResult);
    if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_LASTMODIFIED_TIME) != 0)
        itemOut.setLastModifiedTime(item._updateTimestamp);
    // now attept to get the string offset 
    RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw");
    try {/*w w  w. j a v a 2  s  . c  om*/
        // seek to string data 
        stringDataReader.seek(item._stringsOffset);
        // and skip buffer length 
        WritableUtils.readVInt(stringDataReader);
        // now populate original url ... 
        itemOut.setOriginalURL(stringDataReader.readUTF());
        // now if redirect url is present 
        if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_URL) != 0) {
            itemOut.setRedirectURL(stringDataReader.readUTF());
        }
    } finally {
        stringDataReader.close();
    }
    return itemOut;
}

From source file:org.apache.jackrabbit.oak.plugins.segment.file.TarReader.java

/**
 * Scans through the tar file, looking for all segment entries.
 *
 * @throws IOException if the tar file could not be read
 *///  w ww . j av  a2 s.co  m
private static void recoverEntries(File file, RandomAccessFile access, LinkedHashMap<UUID, byte[]> entries)
        throws IOException {
    byte[] header = new byte[BLOCK_SIZE];
    while (access.getFilePointer() + BLOCK_SIZE <= access.length()) {
        // read the tar header block
        access.readFully(header);

        // compute the header checksum
        int sum = 0;
        for (int i = 0; i < BLOCK_SIZE; i++) {
            sum += header[i] & 0xff;
        }

        // identify possible zero block
        if (sum == 0 && access.getFilePointer() + 2 * BLOCK_SIZE == access.length()) {
            return; // found the zero blocks at the end of the file
        }

        // replace the actual stored checksum with spaces for comparison
        for (int i = 148; i < 148 + 8; i++) {
            sum -= header[i] & 0xff;
            sum += ' ';
        }

        byte[] checkbytes = String.format("%06o\0 ", sum).getBytes(UTF_8);
        for (int i = 0; i < checkbytes.length; i++) {
            if (checkbytes[i] != header[148 + i]) {
                log.warn("Invalid entry checksum at offset {} in tar file {}, skipping...",
                        access.getFilePointer() - BLOCK_SIZE, file);
            }
        }

        // The header checksum passes, so read the entry name and size
        ByteBuffer buffer = ByteBuffer.wrap(header);
        String name = readString(buffer, 100);
        buffer.position(124);
        int size = readNumber(buffer, 12);
        if (access.getFilePointer() + size > access.length()) {
            // checksum was correct, so the size field should be accurate
            log.warn("Partial entry {} in tar file {}, ignoring...", name, file);
            return;
        }

        Matcher matcher = NAME_PATTERN.matcher(name);
        if (matcher.matches()) {
            UUID id = UUID.fromString(matcher.group(1));

            String checksum = matcher.group(3);
            if (checksum != null || !entries.containsKey(id)) {
                byte[] data = new byte[size];
                access.readFully(data);

                // skip possible padding to stay at block boundaries
                long position = access.getFilePointer();
                long remainder = position % BLOCK_SIZE;
                if (remainder != 0) {
                    access.seek(position + (BLOCK_SIZE - remainder));
                }

                if (checksum != null) {
                    CRC32 crc = new CRC32();
                    crc.update(data);
                    if (crc.getValue() != Long.parseLong(checksum, 16)) {
                        log.warn("Checksum mismatch in entry {} of tar file {}, skipping...", name, file);
                        continue;
                    }
                }

                entries.put(id, data);
            }
        } else if (!name.equals(file.getName() + ".idx")) {
            log.warn("Unexpected entry {} in tar file {}, skipping...", name, file);
            long position = access.getFilePointer() + size;
            long remainder = position % BLOCK_SIZE;
            if (remainder != 0) {
                position += BLOCK_SIZE - remainder;
            }
            access.seek(position);
        }
    }
}

From source file:au.org.ala.layers.intersect.Grid.java

/**
 * buffering on top of RandomAccessFile//  w w w. j  av a 2s .  c o m
 */
private Long getBytes(RandomAccessFile raf, byte[] buffer, Long bufferOffset, long seekTo, byte[] dest)
        throws IOException {
    long relativePos = seekTo - bufferOffset;
    if (relativePos < 0) {
        if (seekTo < 0) {
            seekTo = 0;
        }
        raf.seek(seekTo);
        bufferOffset = seekTo;
        raf.read(buffer);
        System.arraycopy(buffer, 0, dest, 0, dest.length);
    } else if (relativePos >= 0 && relativePos < buffer.length) {
        System.arraycopy(buffer, (int) relativePos, dest, 0, dest.length);
    } else if (relativePos - buffer.length < buffer.length) {
        bufferOffset += buffer.length;
        raf.read(buffer);
        int offset = (int) (relativePos - buffer.length);
        System.arraycopy(buffer, offset, dest, 0, dest.length);
    } else {
        raf.seek(seekTo);
        bufferOffset = seekTo;
        raf.read(buffer);
        System.arraycopy(buffer, 0, dest, 0, dest.length);
    }

    return bufferOffset;
}

From source file:big.BigZip.java

/**
 * Given a position inside our knowledge base, retrieve the data up to
 * the next file indicator.//from w  ww.  ja v a  2s .  com
 * @param targetFile    The new file that will be created
 * @param startPosition The position from where we start to read the data
 * @param endPosition
 * @return 
 */
public boolean extractBytes(final File targetFile, final long startPosition, final Long endPosition) {
    /**
     * This is a tricky method. We will be extracting data from a the BIG
     * archive onto a new file somewhere on disk. The biggest challenge here
     * is to find exactly when the data for the file ends and still do the
     * file copy with a wonderful performance.
     */
    try {
        // enable random access to the BIG file (fast as heck)
        RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r");
        // if the target file exists, try to delete it
        if (targetFile.exists()) {
            targetFile.delete();
            if (targetFile.exists()) {
                // we failed completely
                System.out.println("BIG405 - Failed to delete: " + targetFile.getAbsolutePath());
                return false;
            }
        }
        // we need to create a temporary zip file holder
        File fileZip = new File("temp.zip");
        // delete the zip file if it already exists
        if (fileZip.exists()) {
            fileZip.delete();
            if (fileZip.exists()) {
                // we failed completely
                System.out.println("BIG416 - Failed to delete: " + fileZip.getAbsolutePath());
                return false;
            }
        }

        // create a new file
        RandomAccessFile dataNew = new RandomAccessFile(fileZip, "rw");
        // jump directly to the position where the file is positioned
        dataBIG.seek(startPosition);
        // now we start reading bytes during the mentioned interval
        while (dataBIG.getFilePointer() < endPosition) {
            // read a byte from our BIG archive
            int data = dataBIG.read();
            // write the same byte on the target file
            dataNew.write(data);
        }

        // close the file streams
        dataBIG.close();
        dataNew.close();

        // extract the file
        zip.extract(fileZip, new File("."));
        // delete the temp zip file
        fileZip.delete();

    } catch (FileNotFoundException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return false;
    } catch (IOException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return false;
    }

    return true;
}

From source file:org.apache.jackrabbit.oak.segment.file.TarReader.java

/**
 * Scans through the tar file, looking for all segment entries.
 *
 * @throws IOException if the tar file could not be read
 *///from   w w w . ja  va 2 s .  c om
private static void recoverEntries(File file, RandomAccessFile access, LinkedHashMap<UUID, byte[]> entries)
        throws IOException {
    byte[] header = new byte[BLOCK_SIZE];
    while (access.getFilePointer() + BLOCK_SIZE <= access.length()) {
        // read the tar header block
        access.readFully(header);

        // compute the header checksum
        int sum = 0;
        for (int i = 0; i < BLOCK_SIZE; i++) {
            sum += header[i] & 0xff;
        }

        // identify possible zero block
        if (sum == 0 && access.getFilePointer() + 2 * BLOCK_SIZE == access.length()) {
            return; // found the zero blocks at the end of the file
        }

        // replace the actual stored checksum with spaces for comparison
        for (int i = 148; i < 148 + 8; i++) {
            sum -= header[i] & 0xff;
            sum += ' ';
        }

        byte[] checkbytes = String.format("%06o\0 ", sum).getBytes(UTF_8);
        for (int i = 0; i < checkbytes.length; i++) {
            if (checkbytes[i] != header[148 + i]) {
                log.warn("Invalid entry checksum at offset {} in tar file {}, skipping...",
                        access.getFilePointer() - BLOCK_SIZE, file);
            }
        }

        // The header checksum passes, so read the entry name and size
        ByteBuffer buffer = wrap(header);
        String name = readString(buffer, 100);
        buffer.position(124);
        int size = readNumber(buffer, 12);
        if (access.getFilePointer() + size > access.length()) {
            // checksum was correct, so the size field should be accurate
            log.warn("Partial entry {} in tar file {}, ignoring...", name, file);
            return;
        }

        Matcher matcher = NAME_PATTERN.matcher(name);
        if (matcher.matches()) {
            UUID id = UUID.fromString(matcher.group(1));

            String checksum = matcher.group(3);
            if (checksum != null || !entries.containsKey(id)) {
                byte[] data = new byte[size];
                access.readFully(data);

                // skip possible padding to stay at block boundaries
                long position = access.getFilePointer();
                long remainder = position % BLOCK_SIZE;
                if (remainder != 0) {
                    access.seek(position + (BLOCK_SIZE - remainder));
                }

                if (checksum != null) {
                    CRC32 crc = new CRC32();
                    crc.update(data);
                    if (crc.getValue() != Long.parseLong(checksum, 16)) {
                        log.warn("Checksum mismatch in entry {} of tar file {}, skipping...", name, file);
                        continue;
                    }
                }

                entries.put(id, data);
            }
        } else if (!name.equals(file.getName() + ".idx")) {
            log.warn("Unexpected entry {} in tar file {}, skipping...", name, file);
            long position = access.getFilePointer() + size;
            long remainder = position % BLOCK_SIZE;
            if (remainder != 0) {
                position += BLOCK_SIZE - remainder;
            }
            access.seek(position);
        }
    }
}

From source file:FileBaseDataMap.java

/**
 * put Method.<br>//  w w w  .j a v  a 2 s.c  o  m
 * 
 * @param key
 * @param value
 * @param hashCode This is a key value hash code
 */
public void put(String key, String value, int hashCode) {
    try {

        File file = dataFileList[hashCode % numberOfDataFiles];

        StringBuffer buf = new StringBuffer(this.fillCharacter(key, keyDataLength));
        buf.append(this.fillCharacter(value, oneDataLength));

        CacheContainer accessor = (CacheContainer) innerCache.get(file.getAbsolutePath());
        RandomAccessFile raf = null;
        BufferedWriter wr = null;

        if (accessor == null || accessor.isClosed == true) {

            raf = new RandomAccessFile(file, "rwd");
            wr = new BufferedWriter(new FileWriter(file, true));
            accessor = new CacheContainer();
            accessor.raf = raf;
            accessor.wr = wr;
            accessor.file = file;
            innerCache.put(file.getAbsolutePath(), accessor);
        } else {

            raf = accessor.raf;
            wr = accessor.wr;
        }

        // KeyData Write File
        for (int tryIdx = 0; tryIdx < 2; tryIdx++) {
            try {
                // Key??
                long dataLineNo = this.getLinePoint(key, raf);

                if (dataLineNo == -1) {

                    wr.write(buf.toString());
                    wr.flush();

                    // The size of an increment
                    this.totalSize.getAndIncrement();
                } else {

                    // ?????1
                    boolean increMentFlg = false;
                    if (this.get(key, hashCode) == null)
                        increMentFlg = true;

                    raf.seek(dataLineNo * (lineDataSize));
                    raf.write(buf.toString().getBytes(), 0, lineDataSize);
                    if (increMentFlg)
                        this.totalSize.getAndIncrement();
                }
                break;
            } catch (IOException ie) {

                // IOException???1????
                if (tryIdx == 1)
                    throw ie;
                try {

                    if (raf != null)
                        raf.close();
                    if (wr != null)
                        wr.close();

                    raf = new RandomAccessFile(file, "rwd");
                    wr = new BufferedWriter(new FileWriter(file, true));
                    accessor = new CacheContainer();
                    accessor.raf = raf;
                    accessor.wr = wr;
                    accessor.file = file;
                    innerCache.put(file.getAbsolutePath(), accessor);
                } catch (Exception e) {
                    throw e;
                }
            }
        }
    } catch (Exception e2) {
        e2.printStackTrace();
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

/**
 * serialize metadata to disk //  ww w . ja v  a 2 s.  c o m
 * @throws IOException
 */
void writeSubDomainMetadataToDisk(CrawlListMetadata subDomainData) throws IOException {

    DataOutputBuffer outputBuffer = new DataOutputBuffer(CrawlListMetadata.Constants.FixedDataSize);

    subDomainData.serialize(outputBuffer, new BinaryProtocol());

    if (outputBuffer.getLength() > CrawlListMetadata.Constants.FixedDataSize) {
        LOG.error("ListMetadata Serialize for List:" + subDomainData.getDomainName() + " > FixedDataSize!!!");
        outputBuffer.reset();
        subDomainData.setDomainName("<<CORRUPT>>");
        subDomainData.serialize(outputBuffer, new BinaryProtocol());
    }

    synchronized (_subDomainMetadataFile) {
        RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
        try {
            if (subDomainData.getSubDomainDataOffset() == 0) {
                throw new IOException("Data Offset Zero during write!");
            }
            file.seek(subDomainData.getSubDomainDataOffset());
            file.write(outputBuffer.getData(), 0, outputBuffer.getLength());
        } finally {
            file.close();
        }
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

/**
 * update list state of a recently crawled item 
 * // w ww. j  av a2 s.  c  o m
 * @param fingerprint - the fingerprint of the updated item 
 * @param newData         - the updated crawl history data for the given item    
 * @throws IOException
 */
@Override
public void updateItemState(URLFP fingerprint, ProxyCrawlHistoryItem newData) throws IOException {

    if (_listState == LoadState.LOADED) {
        // check for membership ... 
        if (_bloomFilter.isPresent(fingerprint)) {

            //LOG.info("UpdateItemState Called for URL:" + newData.getOriginalURL() + " List:" + getListId());

            //LOG.info("UpdateItemState Loading OnDisk Item for URL:" + newData.getOriginalURL() + " List:" + getListId());
            // extract existing item from disk 
            OnDiskCrawlHistoryItem originalItem = loadOnDiskItemForURLFP(fingerprint);

            //if present (null if false cache hit) 
            if (originalItem != null) {

                // build an on disk item data structure for any potential changes ... 
                OnDiskCrawlHistoryItem newItem = onDiskItemFromHistoryItem(fingerprint, newData);

                // set inital offset information 
                newItem._fileOffset = originalItem._fileOffset;
                newItem._stringsOffset = originalItem._stringsOffset;

                // LOG.info("UpdateItemState Comparing OnDisk Item  to New Item for URL:" + newData.getOriginalURL() + " List:" + getListId());
                // compare the two items ... 
                if (!newItem.equals(originalItem)) {
                    //LOG.info("UpdateItemState Items Don't Match for  URL:" + newData.getOriginalURL() + " List:" + getListId());
                    // ok items do not match ... figure out if strings are different ... 
                    if (newItem._stringsCRC != originalItem._stringsCRC) {
                        RandomAccessFile stringsFile = new RandomAccessFile(_variableDataFile, "rw");

                        try {
                            // seek to end 
                            stringsFile.seek(stringsFile.length());
                            // update offset info 
                            newItem._stringsOffset = stringsFile.length();
                            // write out string data length 
                            WritableUtils.writeVInt(stringsFile, _stringBuffer1.getLength());
                            // write strings to log file
                            stringsFile.write(_stringBuffer1.getData(), 0, _stringBuffer1.getLength());
                        } finally {
                            stringsFile.close();
                        }
                    }
                    // otherwise take the offset from old item 
                    else {
                        newItem._stringsOffset = originalItem._stringsOffset;
                    }
                    //LOG.info("Opening Data File for OnDiskItem load for Fingerprint:" + newItem._urlFingerprint);

                    // ok, different paths depending on wether this is an in memory update or not ... 
                    boolean wroteToMemory = false;
                    synchronized (this) {
                        if (_tempFixedDataBuffer != null) {
                            wroteToMemory = true;
                            // reset output buffer 
                            _tempOutputBuffer.reset();
                            // serizlie to output buffer 
                            newItem.serialize(_tempOutputBuffer);
                            // copy to appropriate location 
                            System.arraycopy(_tempOutputBuffer.getData(), 0, _tempFixedDataBuffer,
                                    (int) originalItem._fileOffset, OnDiskCrawlHistoryItem.ON_DISK_SIZE);
                        }
                    }

                    if (!wroteToMemory) {
                        // write to disk 
                        RandomAccessFile file = new RandomAccessFile(_fixedDataFile, "rw");

                        try {

                            while (true) {
                                try {
                                    //LOG.info("*** TRYING UPDATE LOCK FOR OFFSET:" + originalItem._fileOffset);
                                    FileLock lock = file.getChannel().tryLock(originalItem._fileOffset,
                                            OnDiskCrawlHistoryItem.ON_DISK_SIZE, false);

                                    try {
                                        //LOG.info("*** GOT UPDATE LOCK FOR OFFSET:" + originalItem._fileOffset);
                                        file.seek(originalItem._fileOffset);
                                        newItem.serialize(file);
                                        //LOG.info("Updated Data File for OnDiskItem for Fingerprint:" + originalItem._urlFingerprint);
                                        break;
                                    } finally {
                                        //LOG.info("*** RELEASED UPDATE LOCK FOR OFFSET:" + originalItem._fileOffset);
                                        lock.release();
                                    }
                                } catch (OverlappingFileLockException e) {
                                    LOG.error("###LockConflict(RETRY):" + CCStringUtils.stringifyException(e));
                                }
                            }
                        } finally {
                            file.close();
                        }
                    }

                    // ok now update metadata ... 
                    synchronized (_metadata) {

                        int updateFlags = calculateUpdateFlags(originalItem, newItem);

                        if (updateFlags != 0) {

                            int metadataDirtyFlags = updateMetadata(newItem, _metadata, 0);

                            // only write metadata to disk if temp data buffer is null
                            if (metadataDirtyFlags != 0 && !wroteToMemory) {
                                if ((metadataDirtyFlags & MetadataUpdateFlag_ModifiedCrawlStatus) != 0) {
                                    _metadata.setQueuedItemCount(_metadata.getQueuedItemCount() - 1);
                                }
                                writeMetadataToDisk();
                            }

                            // if not writing to memory then update subdomain metadata 
                            if (!wroteToMemory) {

                                synchronized (_subDomainMetadataFile) {
                                    CrawlListMetadata subDomainMetadata = getSubDomainMetadataByURL(
                                            newData.getOriginalURL());

                                    int subDomainMetadataDirtyFlags = updateMetadata(newItem, subDomainMetadata,
                                            processFileOffsets);

                                    if (subDomainMetadataDirtyFlags != 0 && !wroteToMemory) {
                                        if ((subDomainMetadataDirtyFlags
                                                & MetadataUpdateFlag_ModifiedCrawlStatus) != 0) {
                                            subDomainMetadata.setQueuedItemCount(
                                                    subDomainMetadata.getQueuedItemCount() - 1);
                                        }
                                        writeSubDomainMetadataToDisk(subDomainMetadata);
                                    }
                                }
                            }
                        }
                    }

                    synchronized (this) {
                        if (_eventListener != null) {
                            _eventListener.itemUpdated(fingerprint);
                        }
                    }
                }
            }
        }
    }
}