fm.last.hadoop.tools.ReplicationPolicyFixer.java Source code

Java tutorial

Introduction

Here is the source code for fm.last.hadoop.tools.ReplicationPolicyFixer.java

Source

/**
 * Copyright 2011 Last.fm
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package fm.last.hadoop.tools;

import static com.google.common.collect.Sets.newHashSet;
import static java.lang.System.out;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.List;
import java.util.Set;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.FSConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.net.NetworkTopology;
import org.apache.hadoop.net.Node;

public class ReplicationPolicyFixer extends HadoopCommand {
    protected ClientProtocol nameNode;
    protected NetworkTopology cluster;
    protected FileSystem fs;

    public ReplicationPolicyFixer(Configuration conf) throws IOException {
        super(conf);
        this.fs = FileSystem.get(getConf());
    }

    public static int verifyBlockPlacement(LocatedBlock lBlk, short replication, NetworkTopology cluster) {
        try {
            Class<?> replicationTargetChooserClass = Class
                    .forName("org.apache.hadoop.hdfs.server.namenode.ReplicationTargetChooser");
            Method verifyBlockPlacementMethod = replicationTargetChooserClass.getDeclaredMethod(
                    "verifyBlockPlacement", LocatedBlock.class, Short.TYPE, NetworkTopology.class);
            verifyBlockPlacementMethod.setAccessible(true);

            return (Integer) verifyBlockPlacementMethod.invoke(null, lBlk, new Short(replication), cluster);
        } catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        } catch (SecurityException e) {
            throw new RuntimeException(e);
        } catch (NoSuchMethodException e) {
            throw new RuntimeException(e);
        } catch (IllegalArgumentException e) {
            throw new RuntimeException(e);
        } catch (IllegalAccessException e) {
            throw new RuntimeException(e);
        } catch (InvocationTargetException e) {
            Throwable cause = e.getCause();
            if (cause instanceof RuntimeException) {
                cause.printStackTrace();
                throw (RuntimeException) cause;
            } else {
                throw new RuntimeException(e);
            }
        }
    }

    @Override
    public int run(String[] argv) throws IOException {
        nameNode = DFSClient.createNamenode(getConf());
        cluster = new NetworkTopology();
        for (Node node : nameNode.getDatanodeReport(DatanodeReportType.LIVE)) {
            cluster.add(node);
        }

        // Find miss-replicated files
        Set<Path> files = newHashSet();
        FileStatus fileStatus = fs.getFileStatus(new Path("/"));

        findMissReplicatedFiles(fileStatus, files);

        out.println("Got " + files.size() + " files. Done.");

        // out.print("Increasing replication on files. ");
        // out.flush();
        // setReplication(files, targetReplication);
        // out.println("Got " + files.size() + " files. Done.");
        //
        // out.println("Waiting for files to replicate");
        // out.flush();
        // waitForReplication(files, targetReplication);
        // out.println("Done.");
        //
        // out.print("Resetting replication on files. ");
        // out.flush();
        // setReplication(files, defaultReplication);
        // out.println("Done.");

        return 0;
    }

    private int lastPathNameLength = 0;

    private void findMissReplicatedFiles(FileStatus file, Set<Path> missReplicatedFiles) throws IOException {
        Path path = file.getPath();

        if (file.isDir()) {
            FileStatus[] files = fs.listStatus(path);
            if (files == null) {
                return;
            }
            for (FileStatus subFile : files) {
                findMissReplicatedFiles(subFile, missReplicatedFiles);
            }
            return;
        }

        int pathNameLength = path.toUri().getPath().length();
        String padding = StringUtils.repeat(" ", Math.max(0, lastPathNameLength - pathNameLength));
        lastPathNameLength = pathNameLength;
        out.print(path.toUri().getPath() + padding + "\r");
        out.flush();

        LocatedBlocks blocks = nameNode.getBlockLocations(path.toUri().getPath(), 0, file.getLen());
        if (blocks == null) { // the file is deleted
            return;
        }
        if (blocks.isUnderConstruction()) {
            out.println("\nNot checking open file : " + path.toString());
            return;
        }

        for (LocatedBlock lBlk : blocks.getLocatedBlocks()) {
            if (lBlk.isCorrupt()) {
                out.println("\n" + lBlk.toString() + " is corrupt so skipping file : " + path.toString());
                return;
            }

            Block block = lBlk.getBlock();
            DatanodeInfo[] locs = lBlk.getLocations();
            short targetFileReplication = file.getReplication();
            // verify block placement policy
            int missingRacks = verifyBlockPlacement(lBlk, targetFileReplication, cluster);
            if (missingRacks > 0 && locs.length > 0) {
                out.println("\nReplica placement policy is violated for " + block.toString() + " of file "
                        + path.toString() + ". Block should be additionally replicated on " + missingRacks
                        + " more rack(s).");
                missReplicatedFiles.add(path);
            }
        }
    }

    void setReplication(List<Path> files, short rep) throws IOException {
        for (Path p : files) {
            fs.setReplication(p, rep);
        }
    }

    /**
     * Wait for all files in waitList to have replication number equal to rep.
     * 
     * @param waitList The files are waited for.
     * @param rep The new replication number.
     * @throws IOException IOException
     */
    void waitForReplication(List<Path> waitList, short rep) throws IOException {
        for (Path f : waitList) {
            boolean printedMessage = false;
            boolean printWarning = false;
            FileStatus status = fs.getFileStatus(f);
            long len = status.getLen();

            for (boolean done = false; !done;) {
                BlockLocation[] locations = fs.getFileBlockLocations(status, 0, len);
                int i = 0;
                for (; i < locations.length && locations[i].getHosts().length == rep; i++) {
                    if (!printWarning && locations[i].getHosts().length > rep) {
                        System.out.println("\nWARNING: the waiting time may be long for "
                                + "DECREASING the number of replication.");
                        printWarning = true;
                    }
                }
                done = i == locations.length;

                if (!done) {
                    if (!printedMessage) {
                        System.out.print("Waiting for " + f + " ...");
                        System.out.flush();
                        printedMessage = true;
                    }
                    System.out.print(".");
                    System.out.flush();
                    try {
                        Thread.sleep(10000);
                    } catch (InterruptedException e) {
                    }
                }
            }

            if (printedMessage) {
                System.out.println(" done");
            }
        }
    }

    @Override
    public String getName() {
        return "fixReplication";
    }

    @Override
    public String getDescription() {
        return getShortDescription();
    }

    @Override
    public String getShortDescription() {
        return "Fix block placement policy [DOESN'T WORK YET]";
    }

}