io.hops.erasure_coding.BaseEncodingManager.java Source code

Java tutorial

Introduction

Here is the source code for io.hops.erasure_coding.BaseEncodingManager.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.hops.erasure_coding;

import io.hops.metadata.hdfs.entity.EncodingStatus;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.StringUtils;
import org.json.JSONException;
import org.json.JSONObject;
import org.xml.sax.SAXException;

import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;

public abstract class BaseEncodingManager extends EncodingManager {

    public static enum LOGTYPES {
        ONLINE_RECONSTRUCTION, OFFLINE_RECONSTRUCTION, ENCODING
    }

    ;

    public static final Log LOG = LogFactory.getLog(BaseEncodingManager.class);
    public static final Log ENCODER_METRICS_LOG = LogFactory.getLog("RaidMetrics");
    public static final String JOBUSER = "erasure_coding";

    public static final DateFormat df = new SimpleDateFormat("yyyy-MM-dd-HH-mm");

    /**
     * hadoop configuration
     */
    protected Configuration conf;

    protected boolean initialized = false; // Are we initialized?

    // statistics about RAW hdfs blocks. This counts all replicas of a block.
    public static class Statistics {
        long numProcessedBlocks; // total blocks encountered in namespace
        long processedSize; // disk space occupied by all blocks
        long remainingSize; // total disk space post RAID

        long numMetaBlocks; // total blocks in metafile
        long metaSize; // total disk space for meta files

        public void clear() {
            numProcessedBlocks = 0;
            processedSize = 0;
            remainingSize = 0;
            numMetaBlocks = 0;
            metaSize = 0;
        }

        public String toString() {
            long save = processedSize - (remainingSize + metaSize);
            long savep = 0;
            if (processedSize > 0) {
                savep = (save * 100) / processedSize;
            }
            String msg = " numProcessedBlocks = " + numProcessedBlocks + " processedSize = " + processedSize
                    + " postRaidSize = " + remainingSize + " numMetaBlocks = " + numMetaBlocks + " metaSize = "
                    + metaSize + " %save in raw disk space = " + savep;
            return msg;
        }
    }

    BaseEncodingManager(Configuration conf) throws IOException {
        super(conf);
        try {
            initialize(conf);
        } catch (IOException e) {
            LOG.error(StringUtils.stringifyException(e));
            throw e;
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    private void cleanUpDirectory(String dir, Configuration conf) throws IOException {
        Path pdir = new Path(dir);
        FileSystem fs = pdir.getFileSystem(conf);
        if (fs.exists(pdir)) {
            fs.delete(pdir);
        }
    }

    protected void cleanUpTempDirectory(Configuration conf) throws IOException {
        for (Codec codec : Codec.getCodecs()) {
            cleanUpDirectory(codec.tmpParityDirectory, conf);
        }
    }

    private void initialize(Configuration conf) throws IOException, SAXException, InterruptedException,
            ClassNotFoundException, ParserConfigurationException {
        this.conf = conf;
        // clean up temporay directory
        cleanUpTempDirectory(conf);
        initialized = true;
    }

    static long numBlocks(FileStatus stat) {
        return (long) Math.ceil(stat.getLen() * 1.0 / stat.getBlockSize());
    }

    static long numStripes(long numBlocks, int stripeSize) {
        return (long) Math.ceil(numBlocks * 1.0 / stripeSize);
    }

    /**
     * RAID a list of files / directories
     */
    void doRaid(Configuration conf, PolicyInfo info) throws IOException {
        int targetRepl = Integer.parseInt(info.getProperty("targetReplication"));
        int metaRepl = Integer.parseInt(info.getProperty("metaReplication"));
        Codec codec = Codec.getCodec(info.getCodecId());
        Path destPref = new Path(codec.parityDirectory);

        Statistics statistics = new Statistics();
        doRaid(conf, info.getSrcPath(), destPref, codec, statistics, RaidUtils.NULL_PROGRESSABLE, targetRepl,
                metaRepl);
        LOG.info("RAID statistics " + statistics.toString());
    }

    /**
     * RAID an individual file/directory
     */
    static public boolean doRaid(Configuration conf, PolicyInfo info, Path src, Statistics statistics,
            Progressable reporter) throws IOException {
        int targetRepl = Integer.parseInt(info.getProperty("targetReplication"));
        int metaRepl = Integer.parseInt(info.getProperty("metaReplication"));

        Codec codec = Codec.getCodec(info.getCodecId());
        Path parityPath = new Path(info.getProperty("parityPath"));

        return doRaid(conf, src, parityPath, codec, statistics, reporter, targetRepl, metaRepl);
    }

    public static boolean doRaid(Configuration conf, Path path, Path parityPath, Codec codec, Statistics statistics,
            Progressable reporter, int targetRepl, int metaRepl) throws IOException {
        long startTime = System.currentTimeMillis();
        boolean success = false;
        try {
            return doFileRaid(conf, path, parityPath, codec, statistics, reporter, targetRepl, metaRepl);
        } catch (IOException ioe) {
            throw ioe;
        } finally {
            long delay = System.currentTimeMillis() - startTime;
            long savingBytes = statistics.processedSize - statistics.remainingSize - statistics.metaSize;
            FileSystem srcFs = path.getFileSystem(conf);
            if (success) {
                logRaidEncodingMetrics("SUCCESS", codec, delay, statistics.processedSize,
                        statistics.numProcessedBlocks, statistics.numMetaBlocks, statistics.metaSize, savingBytes,
                        path, LOGTYPES.ENCODING, srcFs);
            } else {
                logRaidEncodingMetrics("FAILURE", codec, delay, statistics.processedSize,
                        statistics.numProcessedBlocks, statistics.numMetaBlocks, statistics.metaSize, savingBytes,
                        path, LOGTYPES.ENCODING, srcFs);
            }
        }
    }

    public static long getNumBlocks(FileStatus stat) {
        long numBlocks = stat.getLen() / stat.getBlockSize();
        if (stat.getLen() % stat.getBlockSize() == 0) {
            return numBlocks;
        } else {
            return numBlocks + 1;
        }
    }

    /**
     * RAID an individual file
     */
    public static boolean doFileRaid(Configuration conf, Path sourceFile, Path parityPath, Codec codec,
            Statistics statistics, Progressable reporter, int targetRepl, int metaRepl) throws IOException {
        FileSystem srcFs = sourceFile.getFileSystem(conf);
        FileStatus sourceStatus = srcFs.getFileStatus(sourceFile);

        // extract block locations from File system
        BlockLocation[] locations = srcFs.getFileBlockLocations(sourceFile, 0, sourceStatus.getLen());
        // if the file has fewer than 2 blocks, then nothing to do
        if (locations.length <= 2) {
            return false;
        }

        // add up the raw disk space occupied by this file
        long diskSpace = 0;
        for (BlockLocation l : locations) {
            diskSpace += (l.getLength() * sourceStatus.getReplication());
        }
        statistics.numProcessedBlocks += locations.length;
        statistics.processedSize += diskSpace;

        // generate parity file
        generateParityFile(conf, sourceStatus, targetRepl, reporter, srcFs, parityPath, codec, locations.length,
                sourceStatus.getReplication(), metaRepl, sourceStatus.getBlockSize());
        if (srcFs.setReplication(sourceFile, (short) targetRepl) == false) {
            LOG.info("Error in reducing replication of " + sourceFile + " to " + targetRepl);
            statistics.remainingSize += diskSpace;
            return false;
        }
        ;

        diskSpace = 0;
        for (BlockLocation l : locations) {
            diskSpace += (l.getLength() * targetRepl);
        }
        statistics.remainingSize += diskSpace;

        // the metafile will have this many number of blocks
        int numMeta = locations.length / codec.stripeLength;
        if (locations.length % codec.stripeLength != 0) {
            numMeta++;
        }

        // we create numMeta for every file. This metablock has metaRepl # replicas.
        // the last block of the metafile might not be completely filled up, but we
        // ignore that for now.
        statistics.numMetaBlocks += (numMeta * metaRepl);
        statistics.metaSize += (numMeta * metaRepl * sourceStatus.getBlockSize());
        return true;
    }

    /**
     * Generate parity file
     */
    static private void generateParityFile(Configuration conf, FileStatus sourceFile, int targetRepl,
            Progressable reporter, FileSystem inFs, Path destPath, Codec codec, long blockNum, int srcRepl,
            int metaRepl, long blockSize) throws IOException {
        Path inpath = sourceFile.getPath();
        FileSystem outFs = inFs;

        // If the parity file is already upto-date and source replication is set
        // then nothing to do.
        try {
            FileStatus stmp = outFs.getFileStatus(destPath);
            if (stmp.getModificationTime() == sourceFile.getModificationTime() && srcRepl == targetRepl) {
                LOG.info("Parity file for " + inpath + "(" + blockNum + ") is " + destPath
                        + " already upto-date and " + "file is at target replication . Nothing more to do.");
                return;
            }
        } catch (IOException e) {
            // ignore errors because the raid file might not exist yet.
        }

        Encoder encoder = new Encoder(conf, codec);
        FileStatus srcStat = inFs.getFileStatus(inpath);
        long srcSize = srcStat.getLen();
        long numBlocks = (srcSize % blockSize == 0) ? (srcSize / blockSize) : ((srcSize / blockSize) + 1);
        long numStripes = (numBlocks % codec.stripeLength == 0) ? (numBlocks / codec.stripeLength)
                : ((numBlocks / codec.stripeLength) + 1);
        StripeReader sReader = new FileStripeReader(conf, blockSize, codec, inFs, 0, inpath, srcSize);
        encoder.encodeFile(conf, inFs, inpath, outFs, destPath, (short) metaRepl, numStripes, blockSize, reporter,
                sReader);

        // set the modification time of the RAID file. This is done so that the modTime of the
        // RAID file reflects that contents of the source file that it has RAIDed. This should
        // also work for files that are being appended to. This is necessary because the time on
        // on the destination namenode may not be synchronised with the timestamp of the 
        // source namenode.
        outFs.setTimes(destPath, sourceFile.getModificationTime(), -1);

        FileStatus outstat = outFs.getFileStatus(destPath);
        FileStatus inStat = inFs.getFileStatus(inpath);
        if (sourceFile.getModificationTime() != inStat.getModificationTime()) {
            String msg = "Source file changed mtime during raiding from " + sourceFile.getModificationTime()
                    + " to " + inStat.getModificationTime();
            throw new IOException(msg);
        }
        if (outstat.getModificationTime() != inStat.getModificationTime()) {
            String msg = "Parity file mtime " + outstat.getModificationTime() + " does not match source mtime "
                    + inStat.getModificationTime();
            throw new IOException(msg);
        }
        LOG.info("Source file " + inpath + " of size " + inStat.getLen() + " Parity file " + destPath + " of size "
                + outstat.getLen() + " src mtime " + sourceFile.getModificationTime() + " parity mtime "
                + outstat.getModificationTime());
    }

    public static Decoder.DecoderInputStream unRaidCorruptInputStream(Configuration conf, Path srcPath,
            long blockSize, long corruptOffset, long limit) throws IOException {
        DistributedFileSystem srcFs = (DistributedFileSystem) srcPath.getFileSystem(conf);
        EncodingStatus encodingStatus = srcFs.getEncodingStatus(srcPath.toUri().getPath());
        Decoder decoder = new Decoder(conf, Codec.getCodec(encodingStatus.getEncodingPolicy().getCodec()));
        String parityFolder = conf.get(DFSConfigKeys.PARITY_FOLDER, DFSConfigKeys.DEFAULT_PARITY_FOLDER);

        return decoder.generateAlternateStream(srcFs, srcPath, srcFs,
                new Path(parityFolder + "/" + encodingStatus.getParityFileName()), blockSize, corruptOffset, limit,
                null);
    }

    /**
     * Returns current time.
     */
    static long now() {
        return System.currentTimeMillis();
    }

    /**
     * Make an absolute path relative by stripping the leading /
     */
    static Path makeRelative(Path path) {
        if (!path.isAbsolute()) {
            return path;
        }
        String p = path.toUri().getPath();
        String relative = p.substring(1, p.length());
        return new Path(relative);
    }

    /**
     * Get the job id from the configuration
     */
    public static String getJobID(Configuration conf) {
        return conf.get("mapred.job.id", "localRaid" + df.format(new Date()));
    }

    static public void logRaidEncodingMetrics(String result, Codec codec, long delay, long numReadBytes,
            long numReadBlocks, long metaBlocks, long metaBytes, long savingBytes, Path srcPath, LOGTYPES type,
            FileSystem fs) {
        try {
            JSONObject json = new JSONObject();
            json.put("result", result);
            json.put("code", codec.id);
            json.put("delay", delay);
            json.put("readbytes", numReadBytes);
            json.put("readblocks", numReadBlocks);
            json.put("metablocks", metaBlocks);
            json.put("metabytes", metaBytes);
            json.put("savingbytes", savingBytes);
            json.put("path", srcPath.toString());
            json.put("type", type.name());
            json.put("cluster", fs.getUri().getAuthority());
            ENCODER_METRICS_LOG.info(json.toString());
        } catch (JSONException e) {
            LOG.warn("Exception when logging the Raid metrics: " + e.getMessage(), e);
        }
    }
}