org.apache.hadoop.hdfs.server.namenode.Ingest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hdfs.server.namenode.Ingest.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.namenode;

import java.io.IOException;
import java.io.EOFException;
import java.lang.Thread;
import java.util.Arrays;
import java.util.EnumMap;
import java.text.SimpleDateFormat;

import org.apache.commons.logging.Log;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
import org.apache.hadoop.hdfs.server.namenode.AvatarNode;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.FSEditLog;
import org.apache.hadoop.hdfs.server.namenode.Standby.StandbyIngestState;
import org.apache.hadoop.hdfs.util.InjectionEvent;
import org.apache.hadoop.hdfs.util.Holder;
import org.apache.hadoop.util.InjectionHandler;

/**
 * This class reads transaction logs from the primary's shared device
 * and feeds it to the standby NameNode.
 */

public class Ingest implements Runnable {

    public static final Log LOG = AvatarNode.LOG;
    static final SimpleDateFormat DATE_FORM = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    private Standby standby;
    private Configuration confg; // configuration of local namenode
    volatile private boolean running = true;
    volatile private boolean catchingUp = true;
    volatile private long catchUpLag;
    volatile private boolean lastScan = false; // is this the last scan?
    private int logVersion;
    volatile private boolean success = false; // not successfully ingested yet

    EditLogInputStream inputEditStream = null;
    long currentPosition; // current offset in the transaction log
    final FSNamesystem fsNamesys;

    // startTxId is known when the ingest is instantiated
    long startTxId = -1;
    // endTxId is -1 until the ingest completes
    long endTxId = -1;

    Ingest(Standby standby, FSNamesystem ns, Configuration conf, long startTxId) throws IOException {
        this.fsNamesys = ns;
        this.standby = standby;
        this.confg = conf;
        this.startTxId = startTxId;
        catchUpLag = conf.getLong("avatar.catchup.lag", 2 * 1024 * 1024L);
        inputEditStream = standby.setupIngestStreamWithRetries(startTxId);
    }

    public void run() {
        if (standby.currentIngestState == StandbyIngestState.CHECKPOINTING) {
            CheckpointSignature sig = standby.getLastRollSignature();
            if (sig == null) {
                // This should never happen, since if checkpoint is in progress
                // and the Ingest thread is getting recreated there should be a sig
                throw new RuntimeException("Ingest: Weird thing has happened. The "
                        + "checkpoint was in progress, but there is no signature of the " + "fsedits available");
            }
            /**
             * If the checkpoint is in progress it means that the edits file was 
             * already read and we cannot read it again from the start. 
             * Just successfuly return.
             */
            success = true;
            return;
        }
        while (running) {
            try {
                // keep ingesting transactions from remote edits log.
                loadFSEdits();
            } catch (Exception e) {
                LOG.warn("Ingest: Exception while processing transactions (" + running + ") ", e);
                throw new RuntimeException("Ingest: failure", e);
            } finally {
                LOG.warn("Ingest: Processing transactions has a hiccup. " + running);
            }
        }
        success = true; // successful ingest.
    }

    /**
     * Immediately Stop the ingestion of transaction logs
     */
    void stop() {
        running = false;
    }

    public boolean isCatchingUp() {
        return catchingUp;
    }

    /**
     * Checks if the ingest is catching up.
     * If the ingest is consuming finalized segment, it's assumed to be behind.
     * Otherwise, catching up is based on the position of the input stream.
     * @throws IOException
     */
    private void setCatchingUp() throws IOException {
        try {
            if (inputEditStream != null && inputEditStream.isInProgress()) {
                catchingUp = (inputEditStream.length() - inputEditStream.getPosition() > catchUpLag);
            } else {
                catchingUp = true;
            }
        } catch (Exception e) {
            catchingUp = true;
        }
    }

    /**
     * Indicate that this is the last pass over the transaction log
     */
    synchronized void quiesce() {
        lastScan = true;
    }

    /**
     * Indicate whether ingest was successful or not.
     * Returns true on success, else false.
     */
    boolean getIngestStatus() {
        return success;
    }

    boolean isRunning() {
        return running;
    }

    long getStartTxId() {
        return startTxId;
    }

    /**
     * Returns the distance in bytes between the current position inside of the
     * edits log and the length of the edits log
     */
    public long getLagBytes() {
        try {
            if (inputEditStream != null && inputEditStream.isInProgress()) {
                // for file journals it may happen that we read a segment finalized
                // by primary, but not refreshed by the standby, so length() returns 0
                // hence we take max(-1,lag)
                return Math.max(-1, inputEditStream.length() - this.inputEditStream.getPosition());
            }
            return -1;
        } catch (IOException ex) {
            LOG.error("Error getting the lag", ex);
            return -1;
        }
    }

    /**
     * Load an edit log, and continue applying the changes to the in-memory 
     * structure. This is where we ingest transactions into the standby.
     */
    private int loadFSEdits() throws IOException {
        FSDirectory fsDir = fsNamesys.dir;
        int numEdits = 0;
        long startTime = FSNamesystem.now();
        LOG.info("Ingest: Consuming transactions: " + this.toString());

        try {
            logVersion = inputEditStream.getVersion();
            if (!LayoutVersion.supports(Feature.TXID_BASED_LAYOUT, logVersion))
                throw new RuntimeException("Log version is too old");

            currentPosition = inputEditStream.getPosition();
            numEdits = ingestFSEdits(); // continue to ingest 
        } finally {
            LOG.info("Ingest: Closing ingest for segment: " + this.toString());
            // At this time we are done reading the transaction log
            // We need to sync to have on disk status the same as in memory
            // if we saw end segment, we already synced
            if (endTxId == -1 && fsDir.fsImage.getEditLog().isOpen()) {
                fsDir.fsImage.getEditLog().logSync();
            }
            inputEditStream.close();
            standby.clearIngestState();
        }
        LOG.info("Ingest: Edits segment: " + this.toString() + " edits # " + numEdits + " loaded in "
                + (FSNamesystem.now() - startTime) / 1000 + " seconds.");

        if (logVersion != FSConstants.LAYOUT_VERSION) // other version
            numEdits++; // save this image asap
        return numEdits;
    }

    /**
     * Read a single transaction from the input edit log
     * @param inputEditLog the log to read from
     * @return a single edit log entry, null if EOF
     * @throws IOException on error
     */
    private FSEditLogOp ingestFSEdit(EditLogInputStream inputEditLog) throws IOException {
        FSEditLogOp op = null;
        try {
            op = inputEditLog.readOp();
            InjectionHandler.processEventIO(InjectionEvent.INGEST_READ_OP);
        } catch (EOFException e) {
            return null; // No more transactions.
        } catch (IOException e) {
            // rethrow, it's handled in ingestFSEdits()
            throw e;
        } catch (Exception e) {
            // some other problem, maybe unchecked exception
            throw new IOException(e);
        }
        return op;
    }

    /**
     * Continue to ingest transaction logs until the currentState is 
     * no longer INGEST. If lastScan is set to true, then we process 
     * till the end of the file and return.
     */
    int ingestFSEdits() throws IOException {
        FSDirectory fsDir = fsNamesys.dir;
        int numEdits = 0;

        long recentOpcodeOffsets[] = new long[2];
        Arrays.fill(recentOpcodeOffsets, -1);
        EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts = new EnumMap<FSEditLogOpCodes, Holder<Integer>>(
                FSEditLogOpCodes.class);

        boolean error = false;
        boolean reopen = false;
        boolean quitAfterScan = false;

        long sharedLogTxId = FSEditLogLoader.TXID_IGNORE;
        long localLogTxId = FSEditLogLoader.TXID_IGNORE;

        FSEditLogOp op = null;
        FSEditLog localEditLog = fsDir.fsImage.getEditLog();

        while (running && !quitAfterScan) {
            // if the application requested that we make a final pass over 
            // the transaction log, then we remember it here. We close and
            // reopen the file to ensure that we can see all the data in the
            // file, one reason being that NFS has open-to-close cache
            // coherancy and the edit log could be stored in NFS.
            //
            if (reopen || lastScan) {
                inputEditStream.close();
                inputEditStream = standby.setupIngestStreamWithRetries(startTxId);
                if (lastScan) {
                    // QUIESCE requested by Standby thread
                    LOG.info("Ingest: Starting last scan of transaction log: " + this.toString());
                    quitAfterScan = true;
                }

                // discard older buffers and start a fresh one.
                inputEditStream.refresh(currentPosition, localEditLog.getLastWrittenTxId());
                setCatchingUp();
                reopen = false;
            }

            //
            // Process all existing transactions till end of file
            //
            while (running) {

                if (lastScan && !quitAfterScan) {
                    // Standby thread informed the ingest to quiesce
                    // we should refresh the input stream as soon as possible
                    // then quitAfterScan will be true
                    break;
                }

                // record the current file offset.
                currentPosition = inputEditStream.getPosition();
                InjectionHandler.processEvent(InjectionEvent.INGEST_BEFORE_LOAD_EDIT);

                fsNamesys.writeLock();
                try {
                    error = false;
                    op = ingestFSEdit(inputEditStream);

                    /*
                     * In the case of segments recovered on primary namenode startup, we
                     * have segments that are finalized (by name), but not containing the
                     * ending transaction. Without this check, we will keep looping until
                     * the next checkpoint to discover this situation.
                     */
                    if (!inputEditStream.isInProgress()
                            && standby.getLastCorrectTxId() == inputEditStream.getLastTxId()) {
                        // this is a correct segment with no end segment transaction
                        LOG.info(
                                "Ingest: Reached finalized log segment end with no end marker. " + this.toString());
                        tearDown(localEditLog, false, true);
                        break;
                    }

                    if (op == null) {
                        FSNamesystem.LOG.debug("Ingest: Invalid opcode, reached end of log "
                                + "Number of transactions found " + numEdits);
                        break; // No more transactions.
                    }

                    sharedLogTxId = op.txid;
                    // Verify transaction ids match.
                    localLogTxId = localEditLog.getLastWrittenTxId() + 1;
                    // Fatal error only when the log contains transactions from the future
                    // we allow to process a transaction with smaller txid than local
                    // we will simply skip it later after reading from the ingest edits
                    if (localLogTxId < sharedLogTxId
                            || InjectionHandler.falseCondition(InjectionEvent.INGEST_TXID_CHECK)) {
                        String message = "The transaction id in the edit log : " + sharedLogTxId
                                + " does not match the transaction id inferred" + " from FSIMAGE : " + localLogTxId;
                        LOG.fatal(message);
                        throw new RuntimeException(message);
                    }

                    // skip previously loaded transactions
                    if (!canApplyTransaction(sharedLogTxId, localLogTxId, op))
                        continue;

                    // for recovery, we do not want to re-load transactions,
                    // but we want to populate local log with them
                    if (shouldLoad(sharedLogTxId)) {
                        FSEditLogLoader.loadEditRecord(logVersion, inputEditStream, recentOpcodeOffsets, opCounts,
                                fsNamesys, fsDir, numEdits, op);
                    }

                    LOG.info("Ingest: " + this.toString() + ", size: " + inputEditStream.length()
                            + ", processing transaction at offset: " + currentPosition + ", txid: " + op.txid
                            + ", opcode: " + op.opCode);

                    if (op.opCode == FSEditLogOpCodes.OP_START_LOG_SEGMENT) {
                        LOG.info("Ingest: Opening log segment: " + this.toString());
                        localEditLog.open();
                    } else if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) {
                        InjectionHandler.processEventIO(InjectionEvent.INGEST_CLEAR_STANDBY_STATE);
                        LOG.info("Ingest: Closing log segment: " + this.toString());
                        tearDown(localEditLog, true, true);
                        numEdits++;
                        LOG.info("Ingest: Reached log segment end. " + this.toString());
                        break;
                    } else {
                        localEditLog.logEdit(op);
                        if (inputEditStream.getReadChecksum() != FSEditLog.getChecksumForWrite().getValue()) {
                            throw new IOException("Ingest: mismatched r/w checksums for transaction #" + numEdits);
                        }
                    }

                    numEdits++;
                    standby.setLastCorrectTxId(op.txid);
                } catch (ChecksumException cex) {
                    LOG.info("Checksum error reading the transaction #" + numEdits + " reopening the file");
                    reopen = true;
                    break;
                } catch (IOException e) {
                    LOG.info("Encountered error reading transaction", e);
                    error = true; // if we haven't reached eof, then error.
                    break;
                } finally {
                    if (localEditLog.isOpen()) {
                        localEditLog.logSyncIfNeeded();
                    }
                    fsNamesys.writeUnlock();
                }
            } // end inner while(running) -- all breaks come here

            // if we failed to read the entire transaction from disk, 
            // then roll back to the offset where there was a last good 
            // read, sleep for sometime for new transaction to
            // appear in the file and then continue;
            if (error || running) {
                // discard older buffers and start a fresh one.
                inputEditStream.refresh(currentPosition, localEditLog.getLastWrittenTxId());
                setCatchingUp();

                if (error) {
                    LOG.info("Ingest: Incomplete transaction record at offset " + inputEditStream.getPosition()
                            + " but the file is of size " + inputEditStream.length() + ". Continuing....");
                }

                if (running && !lastScan) {
                    try {
                        Thread.sleep(100); // sleep for a second
                    } catch (InterruptedException e) {
                        // break out of waiting if we receive an interrupt.
                    }
                }
            }
        } //end outer while(running)

        ///////////////////// FINAL ACTIONS /////////////////////

        // This was the last scan of the file but we could not read a full
        // transaction from disk. If we proceed this will corrupt the image
        if (error) {
            String errorMessage = FSEditLogLoader.getErrorMessage(recentOpcodeOffsets, currentPosition);
            LOG.error(errorMessage);
            throw new IOException(
                    "Failed to read the edits log. " + "Incomplete transaction at " + currentPosition);
        }

        // If the last Scan was completed, then stop the Ingest thread.
        if (lastScan && quitAfterScan) {
            LOG.info("Ingest: lastScan completed. " + this.toString());
            running = false;
            if (localEditLog.isOpen()) {
                // quiesced non-finalized segment
                LOG.info("Ingest: Reached non-finalized log segment end. " + this.toString());
                tearDown(localEditLog, false, localLogTxId != startTxId);
            }
        }
        FSEditLogLoader.dumpOpCounts(opCounts);
        return numEdits; // total transactions consumed
    }

    private void tearDown(FSEditLog localEditLog, boolean writeEndTxn, boolean updateLastCorrectTxn)
            throws IOException {
        localEditLog.endCurrentLogSegment(writeEndTxn);
        endTxId = localEditLog.getLastWrittenTxId();
        running = false;
        lastScan = true;
        if (updateLastCorrectTxn) {
            standby.setLastCorrectTxId(endTxId);
        }
        standby.clearIngestState(endTxId + 1);
    }

    public String toString() {
        String endStr = (endTxId == -1) ? "in progress" : ("" + endTxId);
        return "Log segment (" + startTxId + ", " + endStr + ")";
    }

    private boolean canApplyTransaction(long sharedLogTxid, long localLogTxid, FSEditLogOp op) {
        boolean canApply = false;
        // process all transaction if log has no tx id's
        // it is up to the avatarnode to ensure we 
        // do not re-consume the same edits
        if (sharedLogTxid == FSEditLogLoader.TXID_IGNORE) {
            canApply = true;
        } else {
            // otherwise we can apply the transaction if the txid match
            canApply = (sharedLogTxid == localLogTxid);
        }
        if (!canApply) {
            LOG.info("Ingest: skip loading txId: " + sharedLogTxid + ", local txid: " + localLogTxid + " txn: "
                    + op);
        }
        return canApply;
    }

    /**
     * Used for ingest recovery, where we erase the local edit log, and
     * transactions need to be populated to the local log, but they should be 
     * not loaded into the namespace.
     */
    private boolean shouldLoad(long txid) {
        boolean shouldLoad = txid > standby.getLastCorrectTxId();
        if (!shouldLoad) {
            LOG.info("Ingest: skip loading txId: " + txid
                    + " to namesystem, but writing to edit log, last correct txid: "
                    + standby.getLastCorrectTxId());
        }
        return shouldLoad;
    }
}