org.apache.hadoop.hdfs.server.namenode.bookkeeper.BookKeeperJournalInputStream.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hdfs.server.namenode.bookkeeper.BookKeeperJournalInputStream.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.namenode.bookkeeper;

import org.apache.bookkeeper.client.BKException;
import org.apache.bookkeeper.client.LedgerEntry;
import org.apache.bookkeeper.client.LedgerHandle;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;

import static org.apache.hadoop.hdfs.server.namenode.bookkeeper.BookKeeperJournalManager.bkException;
import static org.apache.hadoop.hdfs.server.namenode.bookkeeper.zk.ZkUtil.interruptedException;

/**
 * A {@link InputStream} over a BookKeeper ledger which maps to a specific
 * edit log segment.
 */
public class BookKeeperJournalInputStream extends InputStream {

    private static final Log LOG = LogFactory.getLog(BookKeeperJournalInputStream.class);

    // BookKeeper ledger is mutable as the ledger may need to be re-opened
    // by the caller in order to find the true end for tailing
    private LedgerHandle ledger;

    // This is not txId, this is the id of the first ledger entry
    private final long firstLedgerEntryId;

    // Maximum ledger entry id seen so far. In an in-progress edit log
    // stream this changes over time
    private long maxLedgerEntryIdSeen;

    private InputStream entryStream;

    // Keep track of the current state (see the InputStreamState inner
    // class for more detailed information) and the "last known good" state
    // (updated by calling savePosition()).
    private InputStreamState currentStreamState;
    private InputStreamState savedStreamState;

    static class InputStreamState {
        private long offsetInLedger; // How many bytes have we read from this ledger
        private long readerPosition; // How many bytes has the reader read
        private long nextLedgerEntryId; // Next ledger entry id to read
        private int offsetInEntry; // Bytes read from the current ledger entry

        InputStreamState() {
            offsetInLedger = 0;
            offsetInEntry = 0;
        }

        /**
         * Create a copy of another state object. Used to save the current state.
         */
        static InputStreamState copyOf(InputStreamState state) {
            InputStreamState copyState = new InputStreamState();
            copyState.setNextLedgerEntryId(state.getNextLedgerEntryId());
            copyState.setOffsetInEntry(state.getOffsetInEntry());
            copyState.setOffsetInLedger(state.getOffsetInLedger());
            copyState.setReaderPosition(state.getReaderPosition());
            return copyState;
        }

        long getOffsetInLedger() {
            return offsetInLedger;
        }

        void setOffsetInLedger(long offsetInLedger) {
            this.offsetInLedger = offsetInLedger;
        }

        void advanceOffsetInLedger(long numBytes) {
            offsetInLedger += numBytes;
        }

        long getReaderPosition() {
            return readerPosition;
        }

        void setReaderPosition(long readerPosition) {
            this.readerPosition = readerPosition;
        }

        long getNextLedgerEntryId() {
            return nextLedgerEntryId;
        }

        void incrementNextLedgerEntryId() {
            this.nextLedgerEntryId++;
        }

        void setNextLedgerEntryId(long nextLedgerEntryId) {
            this.nextLedgerEntryId = nextLedgerEntryId;
        }

        int getOffsetInEntry() {
            return offsetInEntry;
        }

        void setOffsetInEntry(int offsetInEntry) {
            this.offsetInEntry = offsetInEntry;
        }

        void advanceOffsetInEntry(long numBytes) {
            offsetInEntry += numBytes;
        }
    }

    /**
     * Create an input stream object for a specified BookKeper ledger
     * @param ledger The initial ledger instance
     * @param firstLedgerEntryId First ledger entry id (this is different from
     *                           HDFS transaction id!) to read from the ledger.
     */
    public BookKeeperJournalInputStream(LedgerHandle ledger, long firstLedgerEntryId) {
        this.ledger = ledger;
        this.firstLedgerEntryId = firstLedgerEntryId;
        maxLedgerEntryIdSeen = ledger.getLastAddConfirmed();
        currentStreamState = new InputStreamState();
        currentStreamState.setNextLedgerEntryId(firstLedgerEntryId);
    }

    @Override
    public int read() throws IOException {
        byte[] data = new byte[1];
        if (read(data, 0, 1) != 1) {
            return -1;
        }
        return data[0];
    }

    // Once we've reached the end of an entry stream, we want to open
    // a new stream for a new ledger entry
    private InputStream nextEntryStream() throws IOException {
        long nextLedgerEntryId = currentStreamState.getNextLedgerEntryId();
        if (nextLedgerEntryId > maxLedgerEntryIdSeen) {
            updateMaxLedgerEntryIdSeen();
            if (nextLedgerEntryId > maxLedgerEntryIdSeen) {
                // Return null if we've reached the end of the ledger: we can not
                // read beyond the end of the ledger and it is up to the caller to
                // either find the new "tail" of the ledger (if the ledger is in-
                // progress) or open the next ledger (if the ledger is finalized)
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Requesting to ledger entryId " + nextLedgerEntryId + ", but "
                            + " maxLedgerEntryIdSeen is " + maxLedgerEntryIdSeen + ", ledger length is "
                            + ledger.getLength());
                }
                return null;
            }
        }
        try {
            Enumeration<LedgerEntry> entries = ledger.readEntries(nextLedgerEntryId, nextLedgerEntryId);
            currentStreamState.incrementNextLedgerEntryId();
            if (entries.hasMoreElements()) {
                LedgerEntry entry = entries.nextElement();
                if (entries.hasMoreElements()) {
                    throw new IllegalStateException("More than one entry retrieved!");
                }
                currentStreamState.setOffsetInEntry(0);
                return entry.getEntryInputStream();
            }
        } catch (BKException e) {
            throw new IOException("Unrecoverable BookKeeper error reading entry " + nextLedgerEntryId, e);
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new IOException("Interrupted reading BookKeeper entry " + nextLedgerEntryId, e);
        }
        return null;
    }

    /**
     * Change the underlying ledger object in order to be able to correctly
     * determine the "tail" of the ledger.
     * @param ledger The new ledger object
     */
    public void resetLedger(LedgerHandle ledger) throws IOException {
        this.ledger = ledger;
        updateMaxLedgerEntryIdSeen();
    }

    /**
     * Set <code>maxLedgerEntryIdSeen</code> to the maximum of last confirmed
     * entry-id from a quorum of bookies and last confirmed entry-id from
     * metadata stored in ZooKeeper. The reason is to handle the case of
     * when a ledger becomes finalized mid-flight: in this case last confirmed
     * entry-id that is read from a quorum is no longer reliable, but a reliable
     * last-confirmed entry-id is now available in ZooKeeper metadata which is
     * updated when a ledger is finalized.
     * @throws IOException If there's an error talking to BookKeeper
     *                     or ZooKeeper
     */
    private void updateMaxLedgerEntryIdSeen() throws IOException {
        long lcFromMetadata = ledger.getLastAddConfirmed();
        long lcFromQuorum;
        try {
            lcFromQuorum = ledger.readLastConfirmed();
        } catch (BKException e) {
            bkException("Unable to read last confirmed ledger entry id " + "from ledger " + ledger.getId(), e);
            return;
        } catch (InterruptedException e) {
            interruptedException(
                    "Interrupted reading last confirmed ledger " + "entry id from ledger " + ledger.getId(), e);
            return;
        }
        long newMaxLedgerEntryIdSeen = Math.max(lcFromMetadata, lcFromQuorum);
        if (newMaxLedgerEntryIdSeen > maxLedgerEntryIdSeen) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Resetting maxLedgerEntryIdSeen from " + maxLedgerEntryIdSeen + " to "
                        + newMaxLedgerEntryIdSeen);
            }
            maxLedgerEntryIdSeen = newMaxLedgerEntryIdSeen;
        }
    }

    /**
     * Preserve the state associated with the specified reader position
     * (meant for use with {@link #position(long)}
     * @param position The external reader position associated with the
     *                 current ledger state.
     */
    public void savePosition(long position) {
        currentStreamState.setReaderPosition(position);
        savedStreamState = InputStreamState.copyOf(currentStreamState);
    }

    /**
     * "Go back" to the specified reader position by resetting the reader
     * a saved state associated with that position.
     * @param position The reader position we want to go back to
     * @throws IllegalArgumentException If an illegal position is specified
     * @throws IOException If there is an error communicating with BookKeeper
     */
    public void position(long position) throws IOException {
        if (position == 0) {
            currentStreamState.setNextLedgerEntryId(firstLedgerEntryId);
            currentStreamState.setOffsetInEntry(0);
            entryStream = null;
        } else if (savedStreamState == null || position != savedStreamState.getReaderPosition()) {
            // Seek to an arbitrary position through "brute force"
            if (position > Integer.MAX_VALUE) {
                throw new IllegalArgumentException("Asked to position to " + position
                        + ", but can only \"brute-force\" skip up" + Integer.MAX_VALUE);
            }
            position(0);
            skip(position, (int) position);
        } else {
            // savedStream != null && position == savedStream.getReaderPosition()
            int bytesToSkip = 0;
            if (savedStreamState.getOffsetInLedger() > position) {
                // Since reading from the input stream is buffered, we usually will
                // read further into the ledger than the reader has actually
                // read into. In this case we will need to find out exactly *what*
                // position within the ledger entry matches with the reader's last
                // known good position.
                long entryStartPosition = savedStreamState.getOffsetInLedger()
                        - savedStreamState.getOffsetInEntry();
                bytesToSkip = (int) (position - entryStartPosition);
            } else if (savedStreamState.getOffsetInLedger() < position) {
                throw new IllegalArgumentException("Saved offset in ledger (" + savedStreamState.getOffsetInLedger()
                        + ") < position(" + position + ")");
            }
            long nextLedgerEntryId = savedStreamState.getNextLedgerEntryId() == firstLedgerEntryId
                    ? firstLedgerEntryId
                    : (savedStreamState.getNextLedgerEntryId() - 1);
            currentStreamState.setNextLedgerEntryId(nextLedgerEntryId);
            if (bytesToSkip > 0) {
                entryStream = null;
                skip(position, bytesToSkip);
            } else {
                if (currentStreamState.getNextLedgerEntryId() > 0) {
                    currentStreamState.setNextLedgerEntryId(currentStreamState.getNextLedgerEntryId() - 1);
                }
                entryStream = nextEntryStream();
            }
        }
        currentStreamState.setOffsetInLedger(position);
    }

    private void skip(long position, int bytesToSkip) throws IOException {
        // Read further into the ledger such that our position matches the
        // position last consumed by the reader. Discard the data read.
        LOG.info("Attempting to skip " + bytesToSkip + " bytes to get to position " + position);
        byte[] data = new byte[bytesToSkip];
        int skipped;
        if ((skipped = read(data, 0, bytesToSkip)) != bytesToSkip) {
            throw new IllegalStateException("Could not skip to position " + position + ", tried to read "
                    + bytesToSkip + " but only read " + skipped + " bytes!");
        }
    }

    @Override
    public int read(byte[] buf, int off, int len) throws IOException {
        int bytesRead = readInternal(buf, off, len);
        currentStreamState.advanceOffsetInLedger(bytesRead);
        return bytesRead;
    }

    private int readInternal(byte[] buf, int off, int len) throws IOException {
        if (maxLedgerEntryIdSeen == -1) {
            // If this is an in-progress ledger, find out the true "tail" of the
            // ledger
            maxLedgerEntryIdSeen = ledger.getLastAddConfirmed();
            if (maxLedgerEntryIdSeen == -1) { // Nothing has been added to the ledger
                return 0;
            }
        }

        if (entryStream == null) {
            // If we are the end of the current entry, fetch the next one
            entryStream = nextEntryStream();
            if (entryStream == null) { // We are the end of the ledger
                return 0;
            }
        }

        // The calling classes may want to read a sequence of bytes that is
        // spread across multiple ledger entries. In this case, we will need to
        // in a loop: maintain the number of bytes read so far (the offset into
        // the buffer), when we reach the end of the current ledger entry, use
        // nextEntryStream() to begin reading the next ledger entry
        int bytesReadTotal = 0;
        while (bytesReadTotal < len) {
            int bytesReadLast = entryStream.read(buf, off + bytesReadTotal, len - bytesReadTotal);
            if (bytesReadLast == -1) {
                entryStream = nextEntryStream();
                if (entryStream == null) {
                    return bytesReadTotal;
                }
            } else {
                currentStreamState.advanceOffsetInEntry(bytesReadLast);
                bytesReadTotal += bytesReadLast;
            }
        }
        return bytesReadTotal;
    }

    public long getLedgerLength() {
        return ledger.getLength();
    }

    public String getLedgerName() {
        return ledger.toString();
    }

    public void close() throws IOException {
        try {
            ledger.close();
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new IOException("Interrupted during close()", e);
        } catch (BKException e) {
            throw new IOException("BookKeeper error during close()", e);
        }
    }
}