org.apache.hadoop.hdfs.qjournal.server.JournalNodeJournalSyncer.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hdfs.qjournal.server.JournalNodeJournalSyncer.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.qjournal.server;

import java.io.File;
import java.io.IOException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.NetworkInterface;
import java.net.SocketException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.qjournal.protocol.JournalConfigKeys;
import org.apache.hadoop.hdfs.server.common.HdfsConstants;
import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
import org.apache.hadoop.hdfs.util.InjectionEvent;
import org.apache.hadoop.util.InjectionHandler;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.type.TypeReference;

/**
 * This class manages journal segment recovery. If a journal is out of sync, and
 * is re-instantiated to the quorum by calling startLogSegment, we want to see
 * if there are some older segments, which are inProgress, and can be recovered.
 * This will be done by talking to other nodes in the JournalNode set and trying
 * to download missing segments.
 */
public class JournalNodeJournalSyncer implements Runnable {

    private static final Log LOG = LogFactory.getLog(JournalNodeJournalSyncer.class);
    private static final String logMsg = "Journal Recovery: ";

    // all journal nodes in this set
    private final List<InetSocketAddress> journalNodes;
    // this journal node
    private final InetSocketAddress journalNode;

    // tasks populated when startLogSegment is called
    private final BlockingQueue<SyncTask> taskQueue; // queued calls

    private volatile boolean running = true;

    public static final ObjectMapper mapper = new ObjectMapper();

    private int httpConnectReadTimeoutMs = 0;

    JournalNodeJournalSyncer(List<InetSocketAddress> journalNodes, InetSocketAddress journalNode,
            Configuration conf) {
        this.journalNodes = journalNodes;
        this.journalNode = journalNode;
        this.taskQueue = new ArrayBlockingQueue<SyncTask>(10);

        // timeout for getting manifest and reading log segments
        this.httpConnectReadTimeoutMs = conf.getInt(JournalConfigKeys.DFS_QJOURNAL_HTTP_TIMEOUT_KEY,
                JournalConfigKeys.DFS_QJOURNAL_HTTP_TIMEOUT_DEFAULT);
    }

    @Override
    public void run() {
        while (running) {
            try {
                SyncTask task = taskQueue.poll(1000, TimeUnit.MILLISECONDS);

                // do work with the task
                recoverSegments(task);
            } catch (Exception e) {
                LOG.info(logMsg + "caugth exception", e);
            }
        }
    }

    boolean prepareRecovery(SyncTask task) throws IOException {
        // nothing to do
        if (task == null) {
            // task is polled with timeout which means it can be null
            return false;
        }

        // get the journal info
        Journal journal = task.journal;

        if (journal.getMinTxid() > task.createTxId) {
            // we have already purged segments newer than the one we try to process
            LOG.info(logMsg + "skipping sync for journal: " + journal.getJournalId() + " with txid: "
                    + task.createTxId + " since it is too old.");
            // no work to do
            return false;
        } else if (journal.getMinTxid() == task.createTxId) {
            // this is the current segment at startup
            LOG.info(logMsg + "skipping sync for journal: " + journal.getJournalId() + " with txid: "
                    + task.createTxId + " since it is the current segment.");
            // no work to do
            return false;
        }

        LOG.info(logMsg + "checking journal segments for journal: " + journal.getJournalId());

        // synchronize on journal to avoid any changes in the storage
        synchronized (journal) {

            // current minimum txid above which we need to recover
            final long minTxid = journal.getMinTxid();
            // current segment to which we are writing at the moment -1
            final long maxTxid = journal.getCurrentSegmentTxId() - 1;

            // sanity check
            if (maxTxid <= minTxid) {
                LOG.info(logMsg + "skipping sync for journal: " + journal.getJournalId() + " with txid: "
                        + task.createTxId + " since minTxId >= maxTxId.");
                // no work to do
                return false;
            }

            // populate the task with this information
            task.setRange(minTxid, maxTxid);

            // get all underlying files
            List<EditLogFile> elfs = journal.getAllLogFiles();

            for (EditLogFile elf : elfs) {
                if (elf.getFirstTxId() > maxTxid) {
                    // this is a newer or ongoing segment
                    LOG.info(logMsg + "skipping newer/ongoing segment: " + elf);
                    continue;
                }
                if (elf.getFirstTxId() < minTxid) {
                    // this is already not relevant segment
                    // it could have been purged
                    LOG.info(logMsg + "skipping old segment: " + elf);
                    continue;
                }
                if (!elf.isInProgress()) {
                    // this is a finalized segment - no need to recover
                    LOG.info(logMsg + "found finalized segment: " + elf);
                    task.addValidSegment(elf);
                } else {
                    // this is a inprogress segment - we will attempt recovery
                    LOG.info(logMsg + "found inprogress segment: " + elf);
                    task.addInprogressSegement(elf);
                }
            }
        } // end synchronized

        // is there any work to do?
        return task.hasMissingValidSegments();
    }

    /**
     * Recovers a single segment
     * 
     * @param elf
     *          descriptor of the segment to be recovered
     * @param task
     *          contains journal description
     * @throws IOException
     */
    void recoverSegments(SyncTask task) throws IOException {
        // obtain the list of segments that are valid
        if (!prepareRecovery(task)) {
            return;
        }

        // iterate through all nodes
        for (InetSocketAddress jn : journalNodes) {
            if (isLocalIpAddress(jn.getAddress()) && jn.getPort() == journalNode.getPort()) {
                // we do not need to talk to ourselves
                continue;
            }

            try {
                // get manifest for log that we care about
                List<EditLogFile> remoteLogFiles = getManifest(jn, task.journal, task.recoveryStartTxid);

                // go through all remote segments
                for (EditLogFile relf : remoteLogFiles) {
                    recoverSegment(jn, relf, task);
                }

                // if we are done, there is no need to iterate more
                if (!task.hasMissingValidSegments()) {
                    LOG.info(logMsg + "recovery finished.");
                    break;
                }
            } catch (Exception e) {
                LOG.error(logMsg + "error", e);
                continue;
            }
        }
    }

    void recoverSegment(InetSocketAddress jn, EditLogFile relf, SyncTask task) throws IOException {
        try {
            // we are looking for finalized segments that we do not have
            // we only care about segments within the range

            final long remoteStartTxid = relf.getFirstTxId();
            final long remoteEndTxid = relf.getLastTxId();

            if (remoteStartTxid >= task.recoveryStartTxid && remoteStartTxid <= task.recoveryEndTxid
                    && remoteEndTxid <= task.recoveryEndTxid && !relf.isInProgress()
                    && !task.containsValidSegment(relf)) {

                String name = "[" + remoteStartTxid + " : " + remoteEndTxid + "]";

                LOG.info(logMsg + "attempting recovery for segment " + name + " for journal id: "
                        + task.journal.getJournalId());

                // path to download
                String path = GetJournalEditServlet.buildPath(task.journal.getJournalId(), remoteStartTxid,
                        task.journal.getJournalStorage(), 0);

                // url to download
                URL url = new URL("http", jn.getAddress().getHostAddress(), jn.getPort(), path.toString());

                // download temporary file
                File syncedTmpFile = task.journal.getJournalStorage().getSyncLogTemporaryFile(relf.getFirstTxId(),
                        relf.getLastTxId(), now());

                // .tmp file will not interfere with storage
                syncedTmpFile = task.journal.syncLog(now(), relf.getFirstTxId(), url, name, syncedTmpFile);

                // final destination of the file
                File syncedDestFile = task.journal.getJournalStorage().getSyncLogDestFile(relf.getFirstTxId(),
                        relf.getLastTxId());

                if (syncedDestFile == null) {
                    throwIOException(logMsg + " Error when recovering log " + relf);
                }

                // synchronized on journal so no files can be changed
                synchronized (task.journal) {
                    // move away our local copy of the segment
                    EditLogFile localCorruptedFile = null;
                    try {
                        localCorruptedFile = task.getInprogressSegment(remoteStartTxid);
                        if (localCorruptedFile != null) {
                            localCorruptedFile.moveAsideCorruptFile();
                        }
                    } catch (Exception e) {
                        LOG.warn(logMsg + "exception when marking segment: " + localCorruptedFile + " as corrupt.",
                                e);
                    }
                    // move tmp file to finalized log segment
                    FileUtil.replaceFile(syncedTmpFile, syncedDestFile);
                }

                // add the segment to the list so we do not recover it again
                task.addValidSegment(relf);
                LOG.info(logMsg + "successfully recovered segment " + name + " for journal id: "
                        + task.journal.getJournalId());
                InjectionHandler.processEvent(InjectionEvent.QJM_JOURNALNODE_RECOVERY_COMPLETED);
            }
        } catch (Exception e) {
            LOG.warn(logMsg + "exception when recovering segment: " + relf.toColonSeparatedString()
                    + " when trying with journal node: " + jn, e);
        }
    }

    /**
     * Fetch manifest from a single given journal node over http.
     */
    private List<EditLogFile> getManifest(InetSocketAddress jn, Journal journal, long minTxId) throws IOException {
        String m = DFSUtil
                .getHTMLContentWithTimeout(
                        new URL("http", jn.getAddress().getHostAddress(), jn.getPort(),
                                GetJournalManifestServlet.buildPath(journal.getJournalId(), minTxId,
                                        journal.getJournalStorage())),
                        httpConnectReadTimeoutMs, httpConnectReadTimeoutMs);
        return convertJsonToListManifest(m);
    }

    /**
     * Get the map corresponding to the JSON string.
     */
    public static List<EditLogFile> convertJsonToListManifest(String json) throws IOException {
        if (json == null || json.isEmpty()) {
            return new ArrayList<EditLogFile>();
        }
        // get the list of strings from the http response
        TypeReference<List<String>> type = new TypeReference<List<String>>() {
        };
        List<String> logFilesDesc = mapper.readValue(json, type);

        // we need to convert the list of strings into edit log files
        List<EditLogFile> logFiles = new ArrayList<EditLogFile>();
        for (String lf : logFilesDesc) {
            logFiles.add(new EditLogFile(lf));
        }
        return logFiles;
    }

    /**
     * Add sync task for the given journal. Done when starting a new segment.
     */
    public void addSyncTask(Journal journal, long createTxId) {
        taskQueue.add(new SyncTask(journal, createTxId));
    }

    /**
     * Stop this service.
     */
    public void stop() {
        running = false;
    }

    /**
     * When startLogSegment is called we create a task for the given journal.
     * ValidSegmentTxid is the current segment, so we are interested in recovering
     * older segments.
     */
    private static class SyncTask {
        final Journal journal;
        // txid when the task was create
        final long createTxId;

        // all valid segments for the range we are interested in
        final List<EditLogFile> validSegments = new ArrayList<EditLogFile>();
        // all in-progress segments within the range we are interested in
        final List<EditLogFile> inprogressSegments = new ArrayList<EditLogFile>();

        // we are interested in this range inclusively
        long recoveryStartTxid = HdfsConstants.INVALID_TXID;
        long recoveryEndTxid = HdfsConstants.INVALID_TXID;

        SyncTask(Journal journal, long createTxId) {
            this.journal = journal;
            this.createTxId = createTxId;
        }

        void checkSegment(boolean inProgress, EditLogFile elf) {
            if (inProgress ^ elf.isInProgress()) {
                throw new IllegalArgumentException("Edit log file: " + elf.toColonSeparatedString() + " is "
                        + (inProgress ? "not" : "") + " in progress");
            }
        }

        /**
         * Add a valid segment that we have locally.
         */
        void addValidSegment(EditLogFile elf) {
            checkSegment(false, elf);
            validSegments.add(elf);
        }

        /**
         * Check if we have a finalized segment, so we do not have tyo recover it.
         */
        boolean containsValidSegment(EditLogFile elf) {
            if (elf.isInProgress()) {
                // we have only finalized segments
                return false;
            }
            for (EditLogFile e : validSegments) {
                if (elf.getFirstTxId() == e.getFirstTxId() && elf.getLastTxId() == e.getLastTxId()) {
                    return true;
                }
            }
            return false;
        }

        /**
         * Add inprogress segment that we have locally. This is a corrupted segment.
         * 
         * @param elf
         */
        void addInprogressSegement(EditLogFile elf) {
            checkSegment(true, elf);
            inprogressSegments.add(elf);
        }

        /**
         * Get a local inprogress segment.
         */
        EditLogFile getInprogressSegment(long startTxId) {
            for (EditLogFile e : inprogressSegments) {
                if (e.getFirstTxId() == startTxId) {
                    return e;
                }
            }
            return null;
        }

        void setRange(long recoveryStartTxid, long recoveryEndTxid) throws IOException {
            // transaction ids must be valid
            if (recoveryStartTxid >= recoveryEndTxid || recoveryStartTxid <= HdfsConstants.INVALID_TXID
                    || recoveryEndTxid <= HdfsConstants.INVALID_TXID) {
                throwIOException(
                        logMsg + "Illegal start/end transactions: " + recoveryStartTxid + " : " + recoveryEndTxid);
            }
            this.recoveryStartTxid = recoveryStartTxid;
            this.recoveryEndTxid = recoveryEndTxid;
        }

        /**
         * Check if the recovery is needed. If we have all segments within this
         * range, then there is no need to do anything.
         */
        boolean hasMissingValidSegments() throws IOException {
            // rangeSet() has not been called
            if (recoveryStartTxid == HdfsConstants.INVALID_TXID || recoveryEndTxid == HdfsConstants.INVALID_TXID) {
                throwIOException(logMsg + " task range is not set!");
            }
            // if there is no valid segments, there is something missing
            if (validSegments.isEmpty()) {
                return true;
            }
            // check first and last transaction of the range
            if (recoveryStartTxid != validSegments.get(0).getFirstTxId()
                    || recoveryEndTxid != validSegments.get(validSegments.size() - 1).getLastTxId()) {
                return true;
            }
            // check if the valid segments are contiguous
            for (int i = 0; i < validSegments.size() - 1; i++) {
                if (validSegments.get(i).getLastTxId() != validSegments.get(i + 1).getFirstTxId() - 1) {
                    return true;
                }
            }
            // valid segments cover the entire range of transactions
            return false;
        }
    }

    /**
     * Checks if the address is local.
     */
    private boolean isLocalIpAddress(InetAddress addr) {
        if (addr.isAnyLocalAddress() || addr.isLoopbackAddress())
            return true;
        try {
            return NetworkInterface.getByInetAddress(addr) != null;
        } catch (SocketException e) {
            return false;
        }
    }

    static long now() {
        return System.currentTimeMillis();
    }

    static void throwIOException(String msg) throws IOException {
        LOG.error(msg);
        throw new IOException(msg);
    }
}