Example usage for java.io RandomAccessFile read

List of usage examples for java.io RandomAccessFile read

Introduction

In this page you can find the example usage for java.io RandomAccessFile read.

Prototype

public int read() throws IOException 

Source Link

Document

Reads a byte of data from this file.

Usage

From source file:big.BigZip.java

/**
 * Version 2 that permits to extract the text from a compressed file without
 * creating any file on the disk.//from  w w w.ja v a2 s.  co m
 * @param startPosition Offset where the file begins
 * @param endPosition   Offset where the file ends
 * @return      The source code of the compressed file
 */
public String extractBytesToRAM(final long startPosition, final Long endPosition) {

    String result = null;

    try {
        // enable random access to the BIG file (fast as heck)
        RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r");
        // jump directly to the position where the file is positioned
        dataBIG.seek(startPosition);
        // create a byte array
        ByteArrayOutputStream byteOutput = new ByteArrayOutputStream();

        // now we start reading bytes during the mentioned interval
        while (dataBIG.getFilePointer() < endPosition) {
            // read a byte from our BIG archive
            int data = dataBIG.read();
            byteOutput.write(data);
        }
        // flush data at this point
        byteOutput.flush();
        // now convert the stream from input into an output (to feed the zip stream)
        ByteArrayInputStream byteInput = new ByteArrayInputStream(byteOutput.toByteArray());
        // where we place the decompressed bytes
        ByteArrayOutputStream textOutput = new ByteArrayOutputStream();
        // create the zip streamer
        final ArchiveInputStream archiveStream;
        archiveStream = new ArchiveStreamFactory().createArchiveInputStream("zip", byteInput);
        final ZipArchiveEntry entry = (ZipArchiveEntry) archiveStream.getNextEntry();
        // copy all bytes from one location to the other (and decompress the data)
        IOUtils.copy(archiveStream, textOutput);
        // flush the results
        textOutput.flush();
        // we've got the result right here!
        result = textOutput.toString();
        // now close all the streams that we have open
        dataBIG.close();
        byteOutput.close();
        byteInput.close();
        textOutput.close();
        archiveStream.close();

    } catch (FileNotFoundException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    } catch (IOException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    } catch (ArchiveException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
    }

    return result;
}

From source file:big.BigZip.java

/**
 * Version 2 that permits to extract the text from a compressed file without
 * creating any file on the disk./*from   w  w  w . j  a va  2  s .  c o  m*/
 * @param filePosition
 * @return      The source code of the compressed file
 */
public String extractBytesToRAM(final long filePosition) {

    String result = null;

    try {

        // add the signature bytes to our start position
        long startPosition = filePosition + magicSignature.length();

        // enable random access to the BIG file (fast as heck)
        RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r");
        // jump directly to the position where the file is positioned
        dataBIG.seek(startPosition);
        // create a byte array
        ByteArrayOutputStream byteOutput = new ByteArrayOutputStream();

        // get the end of this file entry (by brute-force)
        char test = 0;
        long endPosition = -1;
        while (test != -1) {
            test = dataBIG.readChar();
            // if the magic devil number was found..
            if (test == 66) {
                // read the next value for confirmation
                byte value = dataBIG.readByte();
                if (value != 73) {
                    continue;
                }
                // we found the next entry
                endPosition = dataBIG.getFilePointer() - 1;
                break;
            }
        }

        // rewind back to the start position
        dataBIG.seek(startPosition);

        // now we start reading bytes during the mentioned interval
        while (dataBIG.getFilePointer() < endPosition) {
            // read a byte from our BIG archive
            int data = dataBIG.read();
            byteOutput.write(data);
        }
        // flush data at this point
        byteOutput.flush();
        // now convert the stream from input into an output (to feed the zip stream)
        ByteArrayInputStream byteInput = new ByteArrayInputStream(byteOutput.toByteArray());
        // where we place the decompressed bytes
        ByteArrayOutputStream textOutput = new ByteArrayOutputStream();
        // create the zip streamer
        final ArchiveInputStream archiveStream;
        archiveStream = new ArchiveStreamFactory().createArchiveInputStream("zip", byteInput);
        final ZipArchiveEntry entry = (ZipArchiveEntry) archiveStream.getNextEntry();
        // copy all bytes from one location to the other (and decompress the data)
        IOUtils.copy(archiveStream, textOutput);
        // flush the results
        textOutput.flush();
        // we've got the result right here!
        result = textOutput.toString();
        // now close all the streams that we have open
        dataBIG.close();
        byteOutput.close();
        byteInput.close();
        textOutput.close();
        archiveStream.close();

    } catch (FileNotFoundException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    } catch (IOException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    } catch (ArchiveException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
    }

    return result;
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

void resetSubDomainCounts() throws IOException {

    LOG.info("*** LIST:" + getListId() + " Reset SubDomain Queued Counts.");

    if (_subDomainMetadataFile.exists()) {

        LOG.info("*** LIST:" + getListId() + " FILE EXISTS .");

        RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
        DataInputBuffer inputBuffer = new DataInputBuffer();
        DataOutputBuffer outputBuffer = new DataOutputBuffer(CrawlListMetadata.Constants.FixedDataSize);

        try {//from w ww .  j ava  2s .  co  m
            // skip version 
            file.read();
            // read item count 
            int itemCount = file.readInt();

            LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

            CrawlListMetadata newMetadata = new CrawlListMetadata();

            for (int i = 0; i < itemCount; ++i) {

                long orignalPos = file.getFilePointer();
                file.readFully(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize);
                inputBuffer.reset(outputBuffer.getData(), CrawlListMetadata.Constants.FixedDataSize);
                try {
                    newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                } catch (Exception e) {
                    LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:"
                            + CCStringUtils.stringifyException(e));
                }
                // ok reset everything except hashes and first/last url pointers 
                int urlCount = newMetadata.getUrlCount();
                long firstRecordOffset = newMetadata.getFirstRecordOffset();
                long lastRecordOffset = newMetadata.getLastRecordOffset();
                String domainName = newMetadata.getDomainName();
                long domainHash = newMetadata.getDomainHash();

                // reset 
                newMetadata.clear();
                // restore 
                newMetadata.setUrlCount(urlCount);
                newMetadata.setFirstRecordOffset(firstRecordOffset);
                newMetadata.setLastRecordOffset(lastRecordOffset);
                newMetadata.setDomainName(domainName);
                newMetadata.setDomainHash(domainHash);

                // serialize it ... 
                outputBuffer.reset();
                newMetadata.serialize(outputBuffer, new BinaryProtocol());
                // write it back to disk 
                file.seek(orignalPos);
                // and rewrite it ... 
                file.write(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize);
            }
        } finally {
            file.close();
        }
        LOG.info("*** LIST:" + getListId() + " DONE RESETTIGN SUBDOMAIN METADATA QUEUE COUNTS");
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

public ArrayList<CrawlListDomainItem> getSubDomainList(int offset, int count) {
    synchronized (_metadata) {

        ArrayList<CrawlListDomainItem> itemsOut = new ArrayList<CrawlListDomainItem>();

        try {//from w ww . jav  a  2s.co m
            synchronized (_subDomainMetadataFile) {
                RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
                DataInputBuffer inputBuffer = new DataInputBuffer();
                byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize];

                try {
                    // skip version 
                    file.read();
                    // read item count 
                    int itemCount = file.readInt();

                    int i = offset;
                    int end = Math.min(i + count, itemCount);

                    LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

                    if (i < itemCount) {

                        file.seek(5 + (CrawlListMetadata.Constants.FixedDataSize * offset));

                        CrawlListMetadata newMetadata = new CrawlListMetadata();

                        for (; i < end; ++i) {

                            long orignalPos = file.getFilePointer();
                            file.readFully(fixedDataBlock, 0, fixedDataBlock.length);
                            inputBuffer.reset(fixedDataBlock, fixedDataBlock.length);
                            newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                            itemsOut.add(buildSubDomainSummary(newMetadata.getDomainName(), newMetadata));
                        }
                    }
                } finally {
                    file.close();
                }
            }
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
        }
        LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK");

        return itemsOut;
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

void loadSubDomainMetadataFromDisk() throws IOException {
    LOG.info("*** LIST:" + getListId() + " LOAD SUBDOMAIN METADATA FROM DISK ...  ");
    if (_subDomainMetadataFile.exists()) {

        LOG.info("*** LIST:" + getListId() + " FILE EXISTS LOADING SUBDOMAIN DATA FROM DISK.");

        RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
        DataInputBuffer inputBuffer = new DataInputBuffer();
        byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize];

        try {/* www  .ja  v  a2 s  . com*/
            // skip version 
            file.read();
            // read item count 
            int itemCount = file.readInt();

            LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

            CrawlListMetadata newMetadata = new CrawlListMetadata();

            TreeMap<Long, Integer> idToOffsetMap = new TreeMap<Long, Integer>();
            for (int i = 0; i < itemCount; ++i) {

                long orignalPos = file.getFilePointer();
                file.readFully(fixedDataBlock, 0, fixedDataBlock.length);
                inputBuffer.reset(fixedDataBlock, fixedDataBlock.length);
                try {
                    newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                } catch (Exception e) {
                    LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:"
                            + CCStringUtils.stringifyException(e));
                }
                idToOffsetMap.put(newMetadata.getDomainHash(), (int) orignalPos);
            }

            // write lookup table 
            _offsetLookupTable = new DataOutputBuffer(idToOffsetMap.size() * OFFSET_TABLE_ENTRY_SIZE);
            for (Map.Entry<Long, Integer> entry : idToOffsetMap.entrySet()) {
                _offsetLookupTable.writeLong(entry.getKey());
                _offsetLookupTable.writeInt(entry.getValue());
            }
        } finally {
            file.close();
        }
        LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK");
    } else {

        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA DOES NOT EXIST! LOADING FROM SCRATCH");

        RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw");
        RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw");

        try {

            //ok rebuild top level metadata as well 
            _metadata.clear();

            OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();

            int processedCount = 0;
            while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {

                long position = fixedDataReader.getFilePointer();

                // store offset in item 
                item._fileOffset = position;
                // load from disk 
                item.deserialize(fixedDataReader);
                try {
                    // seek to string data 
                    stringDataReader.seek(item._stringsOffset);
                    // and skip buffer length 
                    WritableUtils.readVInt(stringDataReader);
                    // and read primary string 
                    String url = stringDataReader.readUTF();

                    // get metadata object for subdomain 
                    CrawlListMetadata subDomainMetadata = getTransientSubDomainMetadata(url);

                    // increment url count 
                    subDomainMetadata.setUrlCount(subDomainMetadata.getUrlCount() + 1);

                    // increment top level metadata count 
                    _metadata.setUrlCount(_metadata.getUrlCount() + 1);

                    // update top level metadata ..
                    updateMetadata(item, _metadata, 0);

                    // update sub-domain metadata object  from item data
                    updateMetadata(item, subDomainMetadata, 0);

                    ++processedCount;
                } catch (IOException e) {
                    LOG.error("Exception Reading String Data For Item:" + (processedCount + 1));
                    LOG.error("Exception:" + CCStringUtils.stringifyException(e));
                    LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:"
                            + stringDataReader.getFilePointer());
                }

                if (processedCount % 10000 == 0) {
                    LOG.info("*** LIST:" + getListId() + " Processed:" + processedCount + " Items");
                }
            }

            // ok commit top level metadata to disk as well 
            writeMetadataToDisk();

        } catch (IOException e) {
            LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:"
                    + CCStringUtils.stringifyException(e));
            LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:"
                    + stringDataReader.getFilePointer());
            _queueState = QueueState.QUEUED;
        } finally {
            fixedDataReader.close();
            stringDataReader.close();
        }
        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITING TO DISK");

        // write metadat to disk 
        writeInitialSubDomainMetadataToDisk();

        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITE COMPLETE");
    }
}

From source file:big.BigZip.java

/**
 * Given a position inside our knowledge base, retrieve the data up to
 * the next file indicator.//from w  w  w .  j a  v  a 2 s .  c om
 * @param targetFile    The new file that will be created
 * @param startPosition The position from where we start to read the data
 * @param endPosition
 * @return 
 */
public boolean extractBytes(final File targetFile, final long startPosition, final Long endPosition) {
    /**
     * This is a tricky method. We will be extracting data from a the BIG
     * archive onto a new file somewhere on disk. The biggest challenge here
     * is to find exactly when the data for the file ends and still do the
     * file copy with a wonderful performance.
     */
    try {
        // enable random access to the BIG file (fast as heck)
        RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r");
        // if the target file exists, try to delete it
        if (targetFile.exists()) {
            targetFile.delete();
            if (targetFile.exists()) {
                // we failed completely
                System.out.println("BIG405 - Failed to delete: " + targetFile.getAbsolutePath());
                return false;
            }
        }
        // we need to create a temporary zip file holder
        File fileZip = new File("temp.zip");
        // delete the zip file if it already exists
        if (fileZip.exists()) {
            fileZip.delete();
            if (fileZip.exists()) {
                // we failed completely
                System.out.println("BIG416 - Failed to delete: " + fileZip.getAbsolutePath());
                return false;
            }
        }

        // create a new file
        RandomAccessFile dataNew = new RandomAccessFile(fileZip, "rw");
        // jump directly to the position where the file is positioned
        dataBIG.seek(startPosition);
        // now we start reading bytes during the mentioned interval
        while (dataBIG.getFilePointer() < endPosition) {
            // read a byte from our BIG archive
            int data = dataBIG.read();
            // write the same byte on the target file
            dataNew.write(data);
        }

        // close the file streams
        dataBIG.close();
        dataNew.close();

        // extract the file
        zip.extract(fileZip, new File("."));
        // delete the temp zip file
        fileZip.delete();

    } catch (FileNotFoundException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return false;
    } catch (IOException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return false;
    }

    return true;
}

From source file:org.apache.james.mailrepository.file.MBoxMailRepository.java

/**
 * Parse the mbox file.//from w  ww. j a  v  a 2  s.c  o  m
 * 
 * @param ins
 *            The random access file to load. Note that the file may or may
 *            not start at offset 0 in the file
 * @param messAct
 *            The action to take when a message is found
 */
private MimeMessage parseMboxFile(RandomAccessFile ins, MessageAction messAct) {
    if ((getLogger().isDebugEnabled())) {
        String logBuffer = this.getClass().getName() + " Start parsing " + mboxFile;

        getLogger().debug(logBuffer);
    }
    try {

        Pattern sepMatchPattern = Pattern.compile("^From (.*) (.*):(.*):(.*)$");

        int c;
        boolean inMessage = false;
        StringBuffer messageBuffer = new StringBuffer();
        String previousMessageSeparator = null;
        boolean foundSep;

        long prevMessageStart = ins.getFilePointer();
        if (BUFFERING) {
            String line;
            while ((line = ins.readLine()) != null) {
                foundSep = sepMatchPattern.matcher(line).matches();

                if (foundSep && inMessage) {
                    // if ((DEEP_DEBUG) && (getLogger().isDebugEnabled())) {
                    // getLogger().debug(this.getClass().getName() +
                    // " Invoking " + messAct.getClass() + " at " +
                    // prevMessageStart);
                    // }
                    MimeMessage endResult = messAct.messageAction(previousMessageSeparator,
                            messageBuffer.toString(), prevMessageStart);
                    if (messAct.isComplete()) {
                        // I've got what I want so just exit
                        return endResult;
                    }
                    previousMessageSeparator = line;
                    prevMessageStart = ins.getFilePointer() - line.length();
                    messageBuffer = new StringBuffer();
                    inMessage = true;
                }
                // Only done at the start (first header)
                if (foundSep && !inMessage) {
                    previousMessageSeparator = line;
                    inMessage = true;
                }
                if (!foundSep && inMessage) {
                    messageBuffer.append(line).append("\n");
                }
            }
        } else {
            StringBuffer line = new StringBuffer();
            while ((c = ins.read()) != -1) {
                if (c == 10) {
                    foundSep = sepMatchPattern.matcher(line).matches();
                    if (foundSep && inMessage) {
                        // if ((DEEP_DEBUG) &&
                        // (getLogger().isDebugEnabled())) {
                        // getLogger().debug(this.getClass().getName() +
                        // " Invoking " + messAct.getClass() + " at " +
                        // prevMessageStart);
                        // }
                        MimeMessage endResult = messAct.messageAction(previousMessageSeparator,
                                messageBuffer.toString(), prevMessageStart);
                        if (messAct.isComplete()) {
                            // I've got what I want so just exit
                            return endResult;
                        }
                        previousMessageSeparator = line.toString();
                        prevMessageStart = ins.getFilePointer() - line.length();
                        messageBuffer = new StringBuffer();
                        inMessage = true;
                    }
                    // Only done at the start (first header)
                    if (foundSep && !inMessage) {
                        previousMessageSeparator = line.toString();
                        inMessage = true;
                    }
                    if (!foundSep) {
                        messageBuffer.append(line).append((char) c);
                    }
                    line = new StringBuffer(); // Reset buffer
                } else {
                    line.append((char) c);
                }
            }
        }

        if (messageBuffer.length() != 0) {
            // process last message
            return messAct.messageAction(previousMessageSeparator, messageBuffer.toString(), prevMessageStart);
        }
    } catch (IOException ioEx) {
        getLogger().error("Unable to write file (General I/O problem) " + mboxFile, ioEx);
    } catch (PatternSyntaxException e) {
        getLogger().error("Bad regex passed " + mboxFile, e);
    } finally {
        if ((getLogger().isDebugEnabled())) {
            String logBuffer = this.getClass().getName() + " Finished parsing " + mboxFile;

            getLogger().debug(logBuffer);
        }
    }
    return null;
}