Example usage for java.util.zip CheckedInputStream CheckedInputStream

List of usage examples for java.util.zip CheckedInputStream CheckedInputStream

Introduction

In this page you can find the example usage for java.util.zip CheckedInputStream CheckedInputStream.

Prototype

public CheckedInputStream(InputStream in, Checksum cksum) 

Source Link

Document

Creates an input stream using the specified Checksum.

Usage

From source file:org.apache.isis.objectstore.nosql.db.file.server.FileServer.java

private void syncConnection(final Socket connection, final int readTimeout) {
    try {//from  ww w.ja v  a2  s  .  c om
        final CRC32 crc32 = new CRC32();
        final DataOutput output = new DataOutputStream(connection.getOutputStream());
        final DataInput input = new DataInputStream(new CheckedInputStream(connection.getInputStream(), crc32));

        if (input.readByte() != INIT) {
            return;
        }

        final LogRange logFileRange = Util.logFileRange();
        final long lastId = logFileRange.noLogFile() ? -1 : logFileRange.getLast();
        output.writeLong(lastId);
        do {
            if (input.readByte() != RECOVERY_LOG) {
                return;
            }
            crc32.reset();
            final long logId = input.readLong();
            final File file = Util.tmpLogFile(logId);
            LOG.info("syncing recovery file: " + file.getName());
            final BufferedOutputStream fileOutput = new BufferedOutputStream(new FileOutputStream(file));

            final byte[] buffer = new byte[8092];
            int length;
            while ((length = input.readInt()) > 0) {
                input.readFully(buffer, 0, length);
                fileOutput.write(buffer, 0, length);
            }
            fileOutput.close();

            final long calculatedChecksum = crc32.getValue();
            final long sentChecksum = input.readLong();
            if (calculatedChecksum != sentChecksum) {
                throw new NoSqlStoreException("Checksum didn't match during download of " + file.getName());
            }

            recover(file);
            final File renameTo = Util.logFile(logId);
            file.renameTo(renameTo);
        } while (true);
    } catch (final NoSqlStoreException e) {
        LOG.error("file server failure", e);
    } catch (final IOException e) {
        LOG.error("networking failure", e);
    } catch (final RuntimeException e) {
        LOG.error("request failure", e);
    } finally {
        try {
            connection.close();
        } catch (final IOException e) {
            LOG.warn("failure to close connection", e);
        }
    }

    // TODO restart
}

From source file:org.apache.nifi.processors.standard.TailFile.java

/**
 * Updates member variables to reflect the "expected recovery checksum" and
 * seek to the appropriate location in the tailed file, updating our
 * checksum, so that we are ready to proceed with the
 * {@link #onTrigger(ProcessContext, ProcessSession)} call.
 *
 * @param context the ProcessContext/*from ww  w  .  j  a va  2 s .  co m*/
 * @param stateValues the values that were recovered from state that was
 * previously stored. This Map should be populated with the keys defined in
 * {@link TailFileState.StateKeys}.
 * @param filePath the file of the file for which state must be recovered
 * @throws IOException if unable to seek to the appropriate location in the
 * tailed file.
 */
private void recoverState(final ProcessContext context, final Map<String, String> stateValues,
        final String filePath) throws IOException {

    final String prefix = MAP_PREFIX + states.get(filePath).getFilenameIndex() + '.';

    if (!stateValues.containsKey(prefix + TailFileState.StateKeys.FILENAME)) {
        resetState(filePath);
        return;
    }
    if (!stateValues.containsKey(prefix + TailFileState.StateKeys.POSITION)) {
        resetState(filePath);
        return;
    }
    if (!stateValues.containsKey(prefix + TailFileState.StateKeys.TIMESTAMP)) {
        resetState(filePath);
        return;
    }
    if (!stateValues.containsKey(prefix + TailFileState.StateKeys.LENGTH)) {
        resetState(filePath);
        return;
    }

    final String checksumValue = stateValues.get(prefix + TailFileState.StateKeys.CHECKSUM);
    final boolean checksumPresent = (checksumValue != null);
    final String storedStateFilename = stateValues.get(prefix + TailFileState.StateKeys.FILENAME);
    final long position = Long.parseLong(stateValues.get(prefix + TailFileState.StateKeys.POSITION));
    final long timestamp = Long.parseLong(stateValues.get(prefix + TailFileState.StateKeys.TIMESTAMP));
    final long length = Long.parseLong(stateValues.get(prefix + TailFileState.StateKeys.LENGTH));

    FileChannel reader = null;
    File tailFile = null;

    if (checksumPresent && filePath.equals(storedStateFilename)) {
        states.get(filePath).setExpectedRecoveryChecksum(Long.parseLong(checksumValue));

        // We have an expected checksum and the currently configured filename is the same as the state file.
        // We need to check if the existing file is the same as the one referred to in the state file based on
        // the checksum.
        final Checksum checksum = new CRC32();
        final File existingTailFile = new File(storedStateFilename);
        if (existingTailFile.length() >= position) {
            try (final InputStream tailFileIs = new FileInputStream(existingTailFile);
                    final CheckedInputStream in = new CheckedInputStream(tailFileIs, checksum)) {
                StreamUtils.copy(in, new NullOutputStream(), states.get(filePath).getState().getPosition());

                final long checksumResult = in.getChecksum().getValue();
                if (checksumResult == states.get(filePath).getExpectedRecoveryChecksum()) {
                    // Checksums match. This means that we want to resume reading from where we left off.
                    // So we will populate the reader object so that it will be used in onTrigger. If the
                    // checksums do not match, then we will leave the reader object null, so that the next
                    // call to onTrigger will result in a new Reader being created and starting at the
                    // beginning of the file.
                    getLogger().debug(
                            "When recovering state, checksum of tailed file matches the stored checksum. Will resume where left off.");
                    tailFile = existingTailFile;
                    reader = FileChannel.open(tailFile.toPath(), StandardOpenOption.READ);
                    getLogger().debug("Created FileChannel {} for {} in recoverState",
                            new Object[] { reader, tailFile });

                    reader.position(position);
                } else {
                    // we don't seek the reader to the position, so our reader will start at beginning of file.
                    getLogger().debug(
                            "When recovering state, checksum of tailed file does not match the stored checksum. Will begin tailing current file from beginning.");
                }
            }
        } else {
            // fewer bytes than our position, so we know we weren't already reading from this file. Keep reader at a position of 0.
            getLogger().debug(
                    "When recovering state, existing file to tail is only {} bytes but position flag is {}; "
                            + "this indicates that the file has rotated. Will begin tailing current file from beginning.",
                    new Object[] { existingTailFile.length(), position });
        }

        states.get(filePath).setState(new TailFileState(filePath, tailFile, reader, position, timestamp, length,
                checksum, ByteBuffer.allocate(65536)));
    } else {
        resetState(filePath);
    }

    getLogger().debug("Recovered state {}", new Object[] { states.get(filePath).getState() });
}

From source file:org.apache.nifi.processors.standard.TailFile.java

private void processTailFile(final ProcessContext context, final ProcessSession session,
        final String tailFile) {
    // If user changes the file that is being tailed, we need to consume the already-rolled-over data according
    // to the Initial Start Position property
    boolean rolloverOccurred;
    TailFileObject tfo = states.get(tailFile);

    if (tfo.isTailFileChanged()) {
        rolloverOccurred = false;//from   w  ww . j  av a 2s  .co  m
        final String recoverPosition = context.getProperty(START_POSITION).getValue();

        if (START_BEGINNING_OF_TIME.getValue().equals(recoverPosition)) {
            recoverRolledFiles(context, session, tailFile, tfo.getExpectedRecoveryChecksum(),
                    tfo.getState().getTimestamp(), tfo.getState().getPosition());
        } else if (START_CURRENT_FILE.getValue().equals(recoverPosition)) {
            cleanup();
            tfo.setState(new TailFileState(tailFile, null, null, 0L, 0L, 0L, null, tfo.getState().getBuffer()));
        } else {
            final String filename = tailFile;
            final File file = new File(filename);

            try {
                final FileChannel fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.READ);
                getLogger().debug("Created FileChannel {} for {}", new Object[] { fileChannel, file });

                final Checksum checksum = new CRC32();
                final long position = file.length();
                final long timestamp = file.lastModified();

                try (final InputStream fis = new FileInputStream(file);
                        final CheckedInputStream in = new CheckedInputStream(fis, checksum)) {
                    StreamUtils.copy(in, new NullOutputStream(), position);
                }

                fileChannel.position(position);
                cleanup();
                tfo.setState(new TailFileState(filename, file, fileChannel, position, timestamp, file.length(),
                        checksum, tfo.getState().getBuffer()));
            } catch (final IOException ioe) {
                getLogger().error(
                        "Attempted to position Reader at current position in file {} but failed to do so due to {}",
                        new Object[] { file, ioe.toString() }, ioe);
                context.yield();
                return;
            }
        }

        tfo.setTailFileChanged(false);
    } else {
        // Recover any data that may have rolled over since the last time that this processor ran.
        // If expectedRecoveryChecksum != null, that indicates that this is the first iteration since processor was started, so use whatever checksum value
        // was present when the state was last persisted. In this case, we must then null out the value so that the next iteration won't keep using the "recovered"
        // value. If the value is null, then we know that either the processor has already recovered that data, or there was no state persisted. In either case,
        // use whatever checksum value is currently in the state.
        Long expectedChecksumValue = tfo.getExpectedRecoveryChecksum();
        if (expectedChecksumValue == null) {
            expectedChecksumValue = tfo.getState().getChecksum() == null ? null
                    : tfo.getState().getChecksum().getValue();
        }

        rolloverOccurred = recoverRolledFiles(context, session, tailFile, expectedChecksumValue,
                tfo.getState().getTimestamp(), tfo.getState().getPosition());
        tfo.setExpectedRecoveryChecksum(null);
    }

    // initialize local variables from state object; this is done so that we can easily change the values throughout
    // the onTrigger method and then create a new state object after we finish processing the files.
    TailFileState state = tfo.getState();
    File file = state.getFile();
    FileChannel reader = state.getReader();
    Checksum checksum = state.getChecksum();
    if (checksum == null) {
        checksum = new CRC32();
    }
    long position = state.getPosition();
    long timestamp = state.getTimestamp();
    long length = state.getLength();

    // Create a reader if necessary.
    if (file == null || reader == null) {
        file = new File(tailFile);
        reader = createReader(file, position);
        if (reader == null) {
            context.yield();
            return;
        }
    }

    final long startNanos = System.nanoTime();

    // Check if file has rotated
    if (rolloverOccurred || (timestamp <= file.lastModified() && length > file.length())
            || (timestamp < file.lastModified() && length >= file.length())) {

        // Since file has rotated, we close the reader, create a new one, and then reset our state.
        try {
            reader.close();
            getLogger().debug("Closed FileChannel {}", new Object[] { reader, reader });
        } catch (final IOException ioe) {
            getLogger().warn("Failed to close reader for {} due to {}", new Object[] { file, ioe });
        }

        reader = createReader(file, 0L);
        position = 0L;
        checksum.reset();
    }

    if (file.length() == position || !file.exists()) {
        // no data to consume so rather than continually running, yield to allow other processors to use the thread.
        getLogger().debug("No data to consume; created no FlowFiles");
        tfo.setState(new TailFileState(tailFile, file, reader, position, timestamp, length, checksum,
                state.getBuffer()));
        persistState(tfo, context);
        context.yield();
        return;
    }

    // If there is data to consume, read as much as we can.
    final TailFileState currentState = state;
    final Checksum chksum = checksum;
    // data has been written to file. Stream it to a new FlowFile.
    FlowFile flowFile = session.create();

    final FileChannel fileReader = reader;
    final AtomicLong positionHolder = new AtomicLong(position);
    flowFile = session.write(flowFile, new OutputStreamCallback() {
        @Override
        public void process(final OutputStream rawOut) throws IOException {
            try (final OutputStream out = new BufferedOutputStream(rawOut)) {
                positionHolder.set(readLines(fileReader, currentState.getBuffer(), out, chksum));
            }
        }
    });

    // If there ended up being no data, just remove the FlowFile
    if (flowFile.getSize() == 0) {
        session.remove(flowFile);
        getLogger().debug("No data to consume; removed created FlowFile");
    } else {
        // determine filename for FlowFile by using <base filename of log file>.<initial offset>-<final offset>.<extension>
        final String tailFilename = file.getName();
        final String baseName = StringUtils.substringBeforeLast(tailFilename, ".");
        final String flowFileName;
        if (baseName.length() < tailFilename.length()) {
            flowFileName = baseName + "." + position + "-" + positionHolder.get() + "."
                    + StringUtils.substringAfterLast(tailFilename, ".");
        } else {
            flowFileName = baseName + "." + position + "-" + positionHolder.get();
        }

        final Map<String, String> attributes = new HashMap<>(3);
        attributes.put(CoreAttributes.FILENAME.key(), flowFileName);
        attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
        attributes.put("tailfile.original.path", tailFile);
        flowFile = session.putAllAttributes(flowFile, attributes);

        session.getProvenanceReporter().receive(flowFile, file.toURI().toString(),
                "FlowFile contains bytes " + position + " through " + positionHolder.get() + " of source file",
                TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos));
        session.transfer(flowFile, REL_SUCCESS);
        position = positionHolder.get();

        // Set timestamp to the latest of when the file was modified and the current timestamp stored in the state.
        // We do this because when we read a file that has been rolled over, we set the state to 1 millisecond later than the last mod date
        // in order to avoid ingesting that file again. If we then read from this file during the same second (or millisecond, depending on the
        // operating system file last mod precision), then we could set the timestamp to a smaller value, which could result in reading in the
        // rotated file a second time.
        timestamp = Math.max(state.getTimestamp(), file.lastModified());
        length = file.length();
        getLogger().debug("Created {} and routed to success", new Object[] { flowFile });
    }

    // Create a new state object to represent our current position, timestamp, etc.
    tfo.setState(new TailFileState(tailFile, file, reader, position, timestamp, length, checksum,
            state.getBuffer()));

    // We must commit session before persisting state in order to avoid data loss on restart
    session.commit();
    persistState(tfo, context);
}

From source file:org.apache.nifi.processors.standard.TailFile.java

/**
 * Finds any files that have rolled over and have not yet been ingested by
 * this Processor. Each of these files that is found will be ingested as its
 * own FlowFile. If a file is found that has been partially ingested, the
 * rest of the file will be ingested as a single FlowFile but the data that
 * already has been ingested will not be ingested again.
 *
 * @param context the ProcessContext to use in order to obtain Processor
 * configuration./*from   w  w  w.j ava 2s . co  m*/
 * @param session the ProcessSession to use in order to interact with
 * FlowFile creation and content.
 * @param expectedChecksum the checksum value that is expected for the
 * oldest file from offset 0 through &lt;position&gt;.
 * @param timestamp the latest Last Modfiied Timestamp that has been
 * consumed. Any data that was written before this data will not be
 * ingested.
 * @param position the byte offset in the file being tailed, where tailing
 * last left off.
 *
 * @return <code>true</code> if the file being tailed has rolled over, false
 * otherwise
 */
private boolean recoverRolledFiles(final ProcessContext context, final ProcessSession session,
        final String tailFile, final List<File> rolledOffFiles, final Long expectedChecksum,
        final long timestamp, final long position) {
    try {
        getLogger().debug("Recovering Rolled Off Files; total number of files rolled off = {}",
                new Object[] { rolledOffFiles.size() });
        TailFileObject tfo = states.get(tailFile);

        // For first file that we find, it may or may not be the file that we were last reading from.
        // As a result, we have to read up to the position we stored, while calculating the checksum. If the checksums match,
        // then we know we've already processed this file. If the checksums do not match, then we have not
        // processed this file and we need to seek back to position 0 and ingest the entire file.
        // For all other files that have been rolled over, we need to just ingest the entire file.
        boolean rolloverOccurred = !rolledOffFiles.isEmpty();
        if (rolloverOccurred && expectedChecksum != null && rolledOffFiles.get(0).length() >= position) {
            final File firstFile = rolledOffFiles.get(0);

            final long startNanos = System.nanoTime();
            if (position > 0) {
                try (final InputStream fis = new FileInputStream(firstFile);
                        final CheckedInputStream in = new CheckedInputStream(fis, new CRC32())) {
                    StreamUtils.copy(in, new NullOutputStream(), position);

                    final long checksumResult = in.getChecksum().getValue();
                    if (checksumResult == expectedChecksum) {
                        getLogger().debug("Checksum for {} matched expected checksum. Will skip first {} bytes",
                                new Object[] { firstFile, position });

                        // This is the same file that we were reading when we shutdown. Start reading from this point on.
                        rolledOffFiles.remove(0);
                        FlowFile flowFile = session.create();
                        flowFile = session.importFrom(in, flowFile);
                        if (flowFile.getSize() == 0L) {
                            session.remove(flowFile);
                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(
                                    new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L,
                                            firstFile.length(), null, tfo.getState().getBuffer()));
                        } else {
                            final Map<String, String> attributes = new HashMap<>(3);
                            attributes.put(CoreAttributes.FILENAME.key(), firstFile.getName());
                            attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
                            attributes.put("tailfile.original.path", tailFile);
                            flowFile = session.putAllAttributes(flowFile, attributes);

                            session.getProvenanceReporter().receive(flowFile, firstFile.toURI().toString(),
                                    "FlowFile contains bytes 0 through " + position + " of source file",
                                    TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos));
                            session.transfer(flowFile, REL_SUCCESS);
                            getLogger().debug("Created {} from rolled over file {} and routed to success",
                                    new Object[] { flowFile, firstFile });

                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(
                                    new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L,
                                            firstFile.length(), null, tfo.getState().getBuffer()));

                            // must ensure that we do session.commit() before persisting state in order to avoid data loss.
                            session.commit();
                            persistState(tfo, context);
                        }
                    } else {
                        getLogger().debug(
                                "Checksum for {} did not match expected checksum. Checksum for file was {} but expected {}. Will consume entire file",
                                new Object[] { firstFile, checksumResult, expectedChecksum });
                    }
                }
            }
        }

        // For each file that we found that matches our Rollover Pattern, and has a last modified date later than the timestamp
        // that we recovered from the state file, we need to consume the entire file. The only exception to this is the file that
        // we were reading when we last stopped, as it may already have been partially consumed. That is taken care of in the
        // above block of code.
        for (final File file : rolledOffFiles) {
            tfo.setState(consumeFileFully(file, context, session, tfo));
        }

        return rolloverOccurred;
    } catch (final IOException e) {
        getLogger().error("Failed to recover files that have rolled over due to {}", new Object[] { e });
        return false;
    }
}

From source file:org.broadleafcommerce.common.util.StringUtil.java

public static long getChecksum(String test) {
    try {//from w ww. ja va  2s  . c  o  m
        byte buffer[] = test.getBytes();
        ByteArrayInputStream bais = new ByteArrayInputStream(buffer);
        CheckedInputStream cis = new CheckedInputStream(bais, new Adler32());
        byte readBuffer[] = new byte[buffer.length];
        cis.read(readBuffer);
        return cis.getChecksum().getValue();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.cloudata.core.commitlog.CommitLogServer.java

public long getChecksum(String dirName) throws IOException {
    File logFile = getLogFile(dirName);

    long fileLength = logFile.length();

    CheckedInputStream cksumIn = new CheckedInputStream(new FileInputStream(logFile), new CRC32());
    BufferedInputStream in = new BufferedInputStream(cksumIn, 8192);

    for (long i = 0; i < fileLength; i++) {
        in.read();/*from ww  w .  jav a2s .  c om*/
    }

    return cksumIn.getChecksum().getValue();
}

From source file:org.codice.ddf.checksum.impl.Adler32ChecksumProvider.java

@Override
public String calculateChecksum(InputStream inputStream) throws IOException, NoSuchAlgorithmException {

    if (inputStream == null) {
        throw new IllegalArgumentException("InputStream cannot be null");
    }/*from  w  w w  .j  av a 2s  . c o m*/

    long checksumValue = 0L;

    try (CheckedInputStream cis = new CheckedInputStream(inputStream, new Adler32())) {
        IOUtils.toByteArray(cis);
        checksumValue = cis.getChecksum().getValue();
    }

    return Long.toHexString(checksumValue);

}

From source file:org.commoncrawl.hadoop.io.deprecated.ArcFileReader.java

private boolean readHeader() throws IOException {

    if (!_eosReached) {

        CheckedInputStream in = new CheckedInputStream(this.in, _crc);

        _crc.reset();//  ww  w.  j av  a  2  s. co  m

        try {
            // Check header magic
            if (readUShort(in) != GZIP_MAGIC) {
                throw new IOException("Not in GZIP format");
            }
            // Check compression method
            if (readUByte(in) != 8) {
                throw new IOException("Unsupported compression method");
            }
            // Read flags
            int flg = readUByte(in);
            // Skip MTIME, XFL, and OS fields
            skipBytes(in, 6);
            // Skip optional extra field
            if ((flg & FEXTRA) == FEXTRA) {
                skipBytes(in, readUShort(in));
            }
            // Skip optional file name
            if ((flg & FNAME) == FNAME) {
                while (readUByte(in) != 0)
                    ;
            }
            // Skip optional file comment
            if ((flg & FCOMMENT) == FCOMMENT) {
                while (readUByte(in) != 0)
                    ;
            }
            // Check optional header CRC
            if ((flg & FHCRC) == FHCRC) {
                int v = (int) _crc.getValue() & 0xffff;
                if (readUShort(in) != v) {
                    throw new IOException("Corrupt GZIP header");
                }
            }
            return true;
        } catch (EOFException e) {
        }
    }
    return false;
}

From source file:org.commoncrawl.service.listcrawler.CacheManager.java

/**
 * loadCacheItemFromDisk - load a single cache item from disk 
 * /*from  w  w w.  j  a v  a2s . co  m*/
 * @param file
 * @param optTargetURL
 * @param location
 * @return
 * @throws IOException
 */
private CacheItem loadCacheItemFromDisk(FileInputStream file, String optTargetURL, long location)
        throws IOException {

    long timeStart = System.currentTimeMillis();

    // and read out the Item Header ...  
    CacheItemHeader itemHeader = new CacheItemHeader();
    itemHeader.readHeader(new DataInputStream(file));
    // see if it is valid ... 
    if (!Arrays.equals(itemHeader._sync, _header._sync)) {
        LOG.error("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                + " failed - corrupt sync bytes detected!!!");
    } else {
        CRC32 crc32 = new CRC32();
        // ok deserialize the bytes ... 
        CacheItem item = new CacheItem();
        CheckedInputStream checkedStream = new CheckedInputStream(file, crc32);
        DataInputStream itemStream = new DataInputStream(checkedStream);
        item.readFields(itemStream);
        // read the content buffer length 
        int contentBufferLen = itemStream.readInt();
        if (contentBufferLen != 0) {
            byte data[] = new byte[contentBufferLen];
            itemStream.read(data);
            item.setContent(new Buffer(data));
        }

        // cache crc 
        long crcValueComputed = crc32.getValue();
        // read disk crc 
        long crcValueOnDisk = itemStream.readLong();
        // validate 
        if (crcValueComputed == crcValueOnDisk) {
            String canonicalURL = URLUtils.canonicalizeURL(item.getUrl(), true);
            if (optTargetURL.length() == 0 || optTargetURL.equals(canonicalURL)) {
                if (isValidCacheItem(item)) {
                    LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                            + " completed in:" + (System.currentTimeMillis() - timeStart));
                    return item;
                } else {
                    LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                            + " failed with invalid result code");
                }

            } else {
                LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                        + " failed with url mismatch. record url:" + item.getUrl());
            }
        } else {
            LOG.error("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                    + " failed - crc mismatch!!!");
        }
    }
    return null;
}

From source file:org.commoncrawl.util.StreamingArcFileReader.java

/** 
 * Costructs a new StreamingArcFileReader object
 * /*from ww w . j ava 2  s. co  m*/
 */

public StreamingArcFileReader(boolean hasArcFileHeader) {

    // setup the proper stream...
    _rawInput = new InputStream() {

        byte oneByteArray[] = new byte[1];

        @Override
        public synchronized int available() throws IOException {
            return _bytesAvailable;
        }

        @Override
        public int read() throws IOException {
            if (read(oneByteArray, 0, 1) != -1) {
                _streamPos++;
                return oneByteArray[0] & 0xff;
            }
            return -1;
        }

        @Override
        public int read(byte b[], int off, int len) throws IOException {
            if (_activeInputBuffer == null || _activeInputBuffer.remaining() == 0) {

                _activeInputBuffer = null;

                BufferItem nextItem = null;

                try {
                    if (_consumerQueue.size() != 0) {
                        nextItem = _consumerQueue.take();
                    }
                } catch (InterruptedException e) {
                }

                if (nextItem != null) {
                    if (nextItem._buffer == null) {
                        return -1;
                    } else {
                        _activeInputBuffer = nextItem._buffer;
                    }
                }
            }

            if (_activeInputBuffer != null || _activeInputBuffer.remaining() != 0) {

                final int sizeAvailable = _activeInputBuffer.remaining();
                final int sizeToRead = Math.min(sizeAvailable, len);

                _activeInputBuffer.get(b, off, sizeToRead);

                _streamPos += sizeToRead;

                synchronized (this) {
                    _bytesAvailable -= sizeToRead;
                }

                return sizeToRead;
            } else {
                return 0;
            }
        }
    };

    _checkedInput = new CheckedInputStream(_rawInput, _crc);
    if (!hasArcFileHeader) {
        _readState = ReadState.ReadingEntryHeader;
    }
}