Example usage for java.io RandomAccessFile read

Introduction

In this page you can find the example usage for java.io RandomAccessFile read.

Prototype

public int read() throws IOException

Source Link

Document

Reads a byte of data from this file.

Usage

From source file:big.BigZip.java

/**
 * Version 2 that permits to extract the text from a compressed file without
 * creating any file on the disk.//from  w w w.ja v a2 s.  co m
 * @param startPosition Offset where the file begins
 * @param endPosition   Offset where the file ends
 * @return      The source code of the compressed file
 */
public String extractBytesToRAM(final long startPosition, final Long endPosition) {

    String result = null;

    try {
        // enable random access to the BIG file (fast as heck)
        RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r");
        // jump directly to the position where the file is positioned
        dataBIG.seek(startPosition);
        // create a byte array
        ByteArrayOutputStream byteOutput = new ByteArrayOutputStream();

        // now we start reading bytes during the mentioned interval
        while (dataBIG.getFilePointer() < endPosition) {
            // read a byte from our BIG archive
            int data = dataBIG.read();
            byteOutput.write(data);
        }
        // flush data at this point
        byteOutput.flush();
        // now convert the stream from input into an output (to feed the zip stream)
        ByteArrayInputStream byteInput = new ByteArrayInputStream(byteOutput.toByteArray());
        // where we place the decompressed bytes
        ByteArrayOutputStream textOutput = new ByteArrayOutputStream();
        // create the zip streamer
        final ArchiveInputStream archiveStream;
        archiveStream = new ArchiveStreamFactory().createArchiveInputStream("zip", byteInput);
        final ZipArchiveEntry entry = (ZipArchiveEntry) archiveStream.getNextEntry();
        // copy all bytes from one location to the other (and decompress the data)
        IOUtils.copy(archiveStream, textOutput);
        // flush the results
        textOutput.flush();
        // we've got the result right here!
        result = textOutput.toString();
        // now close all the streams that we have open
        dataBIG.close();
        byteOutput.close();
        byteInput.close();
        textOutput.close();
        archiveStream.close();

    } catch (FileNotFoundException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    } catch (IOException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    } catch (ArchiveException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
    }

    return result;
}

From source file:big.BigZip.java

/**
 * Version 2 that permits to extract the text from a compressed file without
 * creating any file on the disk./*from   w  w  w . j  a va  2  s .  c o  m*/
 * @param filePosition
 * @return      The source code of the compressed file
 */
public String extractBytesToRAM(final long filePosition) {

    String result = null;

    try {

        // add the signature bytes to our start position
        long startPosition = filePosition + magicSignature.length();

        // enable random access to the BIG file (fast as heck)
        RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r");
        // jump directly to the position where the file is positioned
        dataBIG.seek(startPosition);
        // create a byte array
        ByteArrayOutputStream byteOutput = new ByteArrayOutputStream();

        // get the end of this file entry (by brute-force)
        char test = 0;
        long endPosition = -1;
        while (test != -1) {
            test = dataBIG.readChar();
            // if the magic devil number was found..
            if (test == 66) {
                // read the next value for confirmation
                byte value = dataBIG.readByte();
                if (value != 73) {
                    continue;
                }
                // we found the next entry
                endPosition = dataBIG.getFilePointer() - 1;
                break;
            }
        }

        // rewind back to the start position
        dataBIG.seek(startPosition);

        // now we start reading bytes during the mentioned interval
        while (dataBIG.getFilePointer() < endPosition) {
            // read a byte from our BIG archive
            int data = dataBIG.read();
            byteOutput.write(data);
        }
        // flush data at this point
        byteOutput.flush();
        // now convert the stream from input into an output (to feed the zip stream)
        ByteArrayInputStream byteInput = new ByteArrayInputStream(byteOutput.toByteArray());
        // where we place the decompressed bytes
        ByteArrayOutputStream textOutput = new ByteArrayOutputStream();
        // create the zip streamer
        final ArchiveInputStream archiveStream;
        archiveStream = new ArchiveStreamFactory().createArchiveInputStream("zip", byteInput);
        final ZipArchiveEntry entry = (ZipArchiveEntry) archiveStream.getNextEntry();
        // copy all bytes from one location to the other (and decompress the data)
        IOUtils.copy(archiveStream, textOutput);
        // flush the results
        textOutput.flush();
        // we've got the result right here!
        result = textOutput.toString();
        // now close all the streams that we have open
        dataBIG.close();
        byteOutput.close();
        byteInput.close();
        textOutput.close();
        archiveStream.close();

    } catch (FileNotFoundException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    } catch (IOException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    } catch (ArchiveException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
    }

    return result;
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

void resetSubDomainCounts() throws IOException {

    LOG.info("*** LIST:" + getListId() + " Reset SubDomain Queued Counts.");

    if (_subDomainMetadataFile.exists()) {

        LOG.info("*** LIST:" + getListId() + " FILE EXISTS .");

        RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
        DataInputBuffer inputBuffer = new DataInputBuffer();
        DataOutputBuffer outputBuffer = new DataOutputBuffer(CrawlListMetadata.Constants.FixedDataSize);

        try {//from w ww .  j ava  2s .  co  m
            // skip version 
            file.read();
            // read item count 
            int itemCount = file.readInt();

            LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

            CrawlListMetadata newMetadata = new CrawlListMetadata();

            for (int i = 0; i < itemCount; ++i) {

                long orignalPos = file.getFilePointer();
                file.readFully(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize);
                inputBuffer.reset(outputBuffer.getData(), CrawlListMetadata.Constants.FixedDataSize);
                try {
                    newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                } catch (Exception e) {
                    LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:"
                            + CCStringUtils.stringifyException(e));
                }
                // ok reset everything except hashes and first/last url pointers 
                int urlCount = newMetadata.getUrlCount();
                long firstRecordOffset = newMetadata.getFirstRecordOffset();
                long lastRecordOffset = newMetadata.getLastRecordOffset();
                String domainName = newMetadata.getDomainName();
                long domainHash = newMetadata.getDomainHash();

                // reset 
                newMetadata.clear();
                // restore 
                newMetadata.setUrlCount(urlCount);
                newMetadata.setFirstRecordOffset(firstRecordOffset);
                newMetadata.setLastRecordOffset(lastRecordOffset);
                newMetadata.setDomainName(domainName);
                newMetadata.setDomainHash(domainHash);

                // serialize it ... 
                outputBuffer.reset();
                newMetadata.serialize(outputBuffer, new BinaryProtocol());
                // write it back to disk 
                file.seek(orignalPos);
                // and rewrite it ... 
                file.write(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize);
            }
        } finally {
            file.close();
        }
        LOG.info("*** LIST:" + getListId() + " DONE RESETTIGN SUBDOMAIN METADATA QUEUE COUNTS");
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

public ArrayList<CrawlListDomainItem> getSubDomainList(int offset, int count) {
    synchronized (_metadata) {

        ArrayList<CrawlListDomainItem> itemsOut = new ArrayList<CrawlListDomainItem>();

        try {//from w ww . jav  a  2s.co m
            synchronized (_subDomainMetadataFile) {
                RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
                DataInputBuffer inputBuffer = new DataInputBuffer();
                byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize];

                try {
                    // skip version 
                    file.read();
                    // read item count 
                    int itemCount = file.readInt();

                    int i = offset;
                    int end = Math.min(i + count, itemCount);

                    LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

                    if (i < itemCount) {

                        file.seek(5 + (CrawlListMetadata.Constants.FixedDataSize * offset));

                        CrawlListMetadata newMetadata = new CrawlListMetadata();

                        for (; i < end; ++i) {

                            long orignalPos = file.getFilePointer();
                            file.readFully(fixedDataBlock, 0, fixedDataBlock.length);
                            inputBuffer.reset(fixedDataBlock, fixedDataBlock.length);
                            newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                            itemsOut.add(buildSubDomainSummary(newMetadata.getDomainName(), newMetadata));
                        }
                    }
                } finally {
                    file.close();
                }
            }
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
        }
        LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK");

        return itemsOut;
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

void loadSubDomainMetadataFromDisk() throws IOException {
    LOG.info("*** LIST:" + getListId() + " LOAD SUBDOMAIN METADATA FROM DISK ...  ");
    if (_subDomainMetadataFile.exists()) {

        LOG.info("*** LIST:" + getListId() + " FILE EXISTS LOADING SUBDOMAIN DATA FROM DISK.");

        RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
        DataInputBuffer inputBuffer = new DataInputBuffer();
        byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize];

        try {/* www  .ja  v  a2 s  . com*/
            // skip version 
            file.read();
            // read item count 
            int itemCount = file.readInt();

            LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

            CrawlListMetadata newMetadata = new CrawlListMetadata();

            TreeMap<Long, Integer> idToOffsetMap = new TreeMap<Long, Integer>();
            for (int i = 0; i < itemCount; ++i) {

                long orignalPos = file.getFilePointer();
                file.readFully(fixedDataBlock, 0, fixedDataBlock.length);
                inputBuffer.reset(fixedDataBlock, fixedDataBlock.length);
                try {
                    newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                } catch (Exception e) {
                    LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:"
                            + CCStringUtils.stringifyException(e));
                }
                idToOffsetMap.put(newMetadata.getDomainHash(), (int) orignalPos);
            }

            // write lookup table 
            _offsetLookupTable = new DataOutputBuffer(idToOffsetMap.size() * OFFSET_TABLE_ENTRY_SIZE);
            for (Map.Entry<Long, Integer> entry : idToOffsetMap.entrySet()) {
                _offsetLookupTable.writeLong(entry.getKey());
                _offsetLookupTable.writeInt(entry.getValue());
            }
        } finally {
            file.close();
        }
        LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK");
    } else {

        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA DOES NOT EXIST! LOADING FROM SCRATCH");

        RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw");
        RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw");

        try {

            //ok rebuild top level metadata as well 
            _metadata.clear();

            OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();

            int processedCount = 0;
            while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {

                long position = fixedDataReader.getFilePointer();

                // store offset in item 
                item._fileOffset = position;
                // load from disk 
                item.deserialize(fixedDataReader);
                try {
                    // seek to string data 
                    stringDataReader.seek(item._stringsOffset);
                    // and skip buffer length 
                    WritableUtils.readVInt(stringDataReader);
                    // and read primary string 
                    String url = stringDataReader.readUTF();

                    // get metadata object for subdomain 
                    CrawlListMetadata subDomainMetadata = getTransientSubDomainMetadata(url);

                    // increment url count 
                    subDomainMetadata.setUrlCount(subDomainMetadata.getUrlCount() + 1);

                    // increment top level metadata count 
                    _metadata.setUrlCount(_metadata.getUrlCount() + 1);

                    // update top level metadata ..
                    updateMetadata(item, _metadata, 0);

                    // update sub-domain metadata object  from item data
                    updateMetadata(item, subDomainMetadata, 0);

                    ++processedCount;
                } catch (IOException e) {
                    LOG.error("Exception Reading String Data For Item:" + (processedCount + 1));
                    LOG.error("Exception:" + CCStringUtils.stringifyException(e));
                    LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:"
                            + stringDataReader.getFilePointer());
                }

                if (processedCount % 10000 == 0) {
                    LOG.info("*** LIST:" + getListId() + " Processed:" + processedCount + " Items");
                }
            }

            // ok commit top level metadata to disk as well 
            writeMetadataToDisk();

        } catch (IOException e) {
            LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:"
                    + CCStringUtils.stringifyException(e));
            LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:"
                    + stringDataReader.getFilePointer());
            _queueState = QueueState.QUEUED;
        } finally {
            fixedDataReader.close();
            stringDataReader.close();
        }
        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITING TO DISK");

        // write metadat to disk 
        writeInitialSubDomainMetadataToDisk();

        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITE COMPLETE");
    }
}

From source file:big.BigZip.java

/**
 * Given a position inside our knowledge base, retrieve the data up to
 * the next file indicator.//from w  w  w .  j a  v  a 2 s .  c om
 * @param targetFile    The new file that will be created
 * @param startPosition The position from where we start to read the data
 * @param endPosition
 * @return 
 */
public boolean extractBytes(final File targetFile, final long startPosition, final Long endPosition) {
    /**
     * This is a tricky method. We will be extracting data from a the BIG
     * archive onto a new file somewhere on disk. The biggest challenge here
     * is to find exactly when the data for the file ends and still do the
     * file copy with a wonderful performance.
     */
    try {
        // enable random access to the BIG file (fast as heck)
        RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r");
        // if the target file exists, try to delete it
        if (targetFile.exists()) {
            targetFile.delete();
            if (targetFile.exists()) {
                // we failed completely
                System.out.println("BIG405 - Failed to delete: " + targetFile.getAbsolutePath());
                return false;
            }
        }
        // we need to create a temporary zip file holder
        File fileZip = new File("temp.zip");
        // delete the zip file if it already exists
        if (fileZip.exists()) {
            fileZip.delete();
            if (fileZip.exists()) {
                // we failed completely
                System.out.println("BIG416 - Failed to delete: " + fileZip.getAbsolutePath());
                return false;
            }
        }

        // create a new file
        RandomAccessFile dataNew = new RandomAccessFile(fileZip, "rw");
        // jump directly to the position where the file is positioned
        dataBIG.seek(startPosition);
        // now we start reading bytes during the mentioned interval
        while (dataBIG.getFilePointer() < endPosition) {
            // read a byte from our BIG archive
            int data = dataBIG.read();
            // write the same byte on the target file
            dataNew.write(data);
        }

        // close the file streams
        dataBIG.close();
        dataNew.close();

        // extract the file
        zip.extract(fileZip, new File("."));
        // delete the temp zip file
        fileZip.delete();

    } catch (FileNotFoundException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return false;
    } catch (IOException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return false;
    }

    return true;
}

From source file:org.apache.james.mailrepository.file.MBoxMailRepository.java

/**
 * Parse the mbox file.//from w  ww. j a  v  a 2  s.c  o  m
 * 
 * @param ins
 *            The random access file to load. Note that the file may or may
 *            not start at offset 0 in the file
 * @param messAct
 *            The action to take when a message is found
 */
private MimeMessage parseMboxFile(RandomAccessFile ins, MessageAction messAct) {
    if ((getLogger().isDebugEnabled())) {
        String logBuffer = this.getClass().getName() + " Start parsing " + mboxFile;

        getLogger().debug(logBuffer);
    }
    try {

        Pattern sepMatchPattern = Pattern.compile("^From (.*) (.*):(.*):(.*)$");

        int c;
        boolean inMessage = false;
        StringBuffer messageBuffer = new StringBuffer();
        String previousMessageSeparator = null;
        boolean foundSep;

        long prevMessageStart = ins.getFilePointer();
        if (BUFFERING) {
            String line;
            while ((line = ins.readLine()) != null) {
                foundSep = sepMatchPattern.matcher(line).matches();

                if (foundSep && inMessage) {
                    // if ((DEEP_DEBUG) && (getLogger().isDebugEnabled())) {
                    // getLogger().debug(this.getClass().getName() +
                    // " Invoking " + messAct.getClass() + " at " +
                    // prevMessageStart);
                    // }
                    MimeMessage endResult = messAct.messageAction(previousMessageSeparator,
                            messageBuffer.toString(), prevMessageStart);
                    if (messAct.isComplete()) {
                        // I've got what I want so just exit
                        return endResult;
                    }
                    previousMessageSeparator = line;
                    prevMessageStart = ins.getFilePointer() - line.length();
                    messageBuffer = new StringBuffer();
                    inMessage = true;
                }
                // Only done at the start (first header)
                if (foundSep && !inMessage) {
                    previousMessageSeparator = line;
                    inMessage = true;
                }
                if (!foundSep && inMessage) {
                    messageBuffer.append(line).append("\n");
                }
            }
        } else {
            StringBuffer line = new StringBuffer();
            while ((c = ins.read()) != -1) {
                if (c == 10) {
                    foundSep = sepMatchPattern.matcher(line).matches();
                    if (foundSep && inMessage) {
                        // if ((DEEP_DEBUG) &&
                        // (getLogger().isDebugEnabled())) {
                        // getLogger().debug(this.getClass().getName() +
                        // " Invoking " + messAct.getClass() + " at " +
                        // prevMessageStart);
                        // }
                        MimeMessage endResult = messAct.messageAction(previousMessageSeparator,
                                messageBuffer.toString(), prevMessageStart);
                        if (messAct.isComplete()) {
                            // I've got what I want so just exit
                            return endResult;
                        }
                        previousMessageSeparator = line.toString();
                        prevMessageStart = ins.getFilePointer() - line.length();
                        messageBuffer = new StringBuffer();
                        inMessage = true;
                    }
                    // Only done at the start (first header)
                    if (foundSep && !inMessage) {
                        previousMessageSeparator = line.toString();
                        inMessage = true;
                    }
                    if (!foundSep) {
                        messageBuffer.append(line).append((char) c);
                    }
                    line = new StringBuffer(); // Reset buffer
                } else {
                    line.append((char) c);
                }
            }
        }

        if (messageBuffer.length() != 0) {
            // process last message
            return messAct.messageAction(previousMessageSeparator, messageBuffer.toString(), prevMessageStart);
        }
    } catch (IOException ioEx) {
        getLogger().error("Unable to write file (General I/O problem) " + mboxFile, ioEx);
    } catch (PatternSyntaxException e) {
        getLogger().error("Bad regex passed " + mboxFile, e);
    } finally {
        if ((getLogger().isDebugEnabled())) {
            String logBuffer = this.getClass().getName() + " Finished parsing " + mboxFile;

            getLogger().debug(logBuffer);
        }
    }
    return null;
}