com.inmobi.messaging.consumer.util.HadoopUtil.java Source code

Java tutorial

Introduction

Here is the source code for com.inmobi.messaging.consumer.util.HadoopUtil.java

Source

package com.inmobi.messaging.consumer.util;

/*
 * #%L
 * messaging-client-databus
 * %%
 * Copyright (C) 2012 - 2014 InMobi
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import com.inmobi.databus.files.HadoopStreamFile;
import com.inmobi.databus.readers.DatabusStreamWaitingReader;

public class HadoopUtil {
    static final Log LOG = LogFactory.getLog(HadoopUtil.class);

    private static Date lastCommitTime;
    private static int increment = 1;
    public static String[] files = new String[12];
    static Date startCommitTime;
    private static String fileNamePrefix = "datafile";
    static {
        Calendar now = Calendar.getInstance();
        now.add(Calendar.MINUTE, -(files.length + 5));
        startCommitTime = now.getTime();
        LOG.debug("startCommitTime:" + startCommitTime);
        for (int i = 0; i < files.length; i++) {
            files[i] = fileNamePrefix + i;
        }
    }

    public static void incrementCommitTime() {
        increment++;
    }

    public static void setUpHadoopFiles(Path streamDirPrefix, Configuration conf, String[] files,
            String[] suffixDirs, Path[] finalFiles) throws Exception {
        setUpHadoopFiles(streamDirPrefix, conf, files, suffixDirs, finalFiles, false, null, 0, 0);
    }

    public static void setUpHadoopFiles(Path streamDirPrefix, Configuration conf, String[] files,
            String[] suffixDirs, Path[] finalFiles, boolean alternateEmptyFiles, Date minuteDirTimeStamp, int index,
            int startIndex) throws Exception {
        FileSystem fs = streamDirPrefix.getFileSystem(conf);
        Path rootDir = streamDirPrefix.getParent();
        Path tmpDataDir = new Path(rootDir, "data");
        boolean emptyFile = false;
        // setup data dirs
        if (files != null) {
            int i = startIndex;
            int j = index;
            for (String file : files) {
                if (alternateEmptyFiles && emptyFile) {
                    MessageUtil.createEmptySequenceFile(file, fs, tmpDataDir, conf);
                    emptyFile = false;
                } else {
                    MessageUtil.createMessageSequenceFile(file, fs, tmpDataDir, i, conf);
                    emptyFile = true;
                    i += 100;
                }
                Path srcPath = new Path(tmpDataDir, file);
                Date commitTime = getCommitDateForFile(file, minuteDirTimeStamp);
                TestUtil.publishMissingPaths(fs, streamDirPrefix, lastCommitTime, commitTime);
                lastCommitTime = commitTime;
                Path targetDateDir = getTargetDateDir(streamDirPrefix, commitTime);
                List<Path> targetDirs = new ArrayList<Path>();
                if (suffixDirs != null) {
                    for (String suffixDir : suffixDirs) {
                        targetDirs.add(new Path(targetDateDir, suffixDir));
                    }
                } else {
                    targetDirs.add(targetDateDir);
                }
                for (Path targetDir : targetDirs) {
                    fs.mkdirs(targetDir);
                    Path targetPath = new Path(targetDir, file);
                    fs.copyFromLocalFile(srcPath, targetPath);
                    LOG.info("Copied " + srcPath + " to " + targetPath);
                    if (finalFiles != null) {
                        finalFiles[j] = targetPath;
                        j++;
                    }
                    Thread.sleep(1000);
                }
                fs.delete(srcPath, true);
            }
            TestUtil.publishLastPath(fs, streamDirPrefix, lastCommitTime);
        }
    }

    public static HadoopStreamFile getOlderFile(Path streamDirPrefix, FileSystem fs, Path databusFile)
            throws IOException {
        FileStatus stat = fs.getFileStatus(databusFile);
        HadoopStreamFile hs = HadoopStreamFile.create(stat);
        Calendar cal = Calendar.getInstance();
        Date date = DatabusStreamWaitingReader.getDateFromStreamDir(streamDirPrefix, hs.getParent());
        cal.setTime(date);
        return new HadoopStreamFile(DatabusStreamWaitingReader.getMinuteDirPath(streamDirPrefix, cal.getTime()),
                "myfile", hs.getTimestamp() - 36000);
    }

    public static HadoopStreamFile getHigherFile(Path streamDirPrefix, FileSystem fs, Path databusFile)
            throws IOException {
        FileStatus stat = fs.getFileStatus(databusFile);
        HadoopStreamFile hs = HadoopStreamFile.create(stat);
        Calendar cal = Calendar.getInstance();
        Date date = DatabusStreamWaitingReader.getDateFromStreamDir(streamDirPrefix, hs.getParent());
        cal.setTime(date);
        cal.add(Calendar.MINUTE, 1);
        return new HadoopStreamFile(DatabusStreamWaitingReader.getMinuteDirPath(streamDirPrefix, cal.getTime()),
                "myfile", hs.getTimestamp() + 36000);
    }

    public static void setupHadoopCluster(Configuration conf, String[] files, String[] suffixDirs,
            Path[] finalFiles, Path finalDir, boolean createFilesInNextHour) throws Exception {
        setupHadoopCluster(conf, files, suffixDirs, finalFiles, finalDir, false, createFilesInNextHour);
    }

    public static void setupHadoopCluster(Configuration conf, String[] files, String[] suffixDirs,
            Path[] finalFiles, Path finalDir, boolean withEmptyFiles, boolean createFilesInNextHour)
            throws Exception {
        FileSystem fs = finalDir.getFileSystem(conf);

        Path rootDir = finalDir.getParent();
        fs.delete(rootDir, true);
        Path tmpDataDir = new Path(rootDir, "data");
        fs.mkdirs(tmpDataDir);

        if (!createFilesInNextHour) {
            setUpHadoopFiles(finalDir, conf, files, suffixDirs, finalFiles, withEmptyFiles, null, 0, 0);
        } else {
            // start from 1 hour back as we need files in two diff hours.
            Calendar cal = Calendar.getInstance();
            cal.setTime(startCommitTime);
            cal.add(Calendar.HOUR_OF_DAY, -1);

            setUpHadoopFiles(finalDir, conf, files, suffixDirs, finalFiles, withEmptyFiles, cal.getTime(), 0, 0);
            // go to next hour
            cal.add(Calendar.HOUR_OF_DAY, 1);
            int index = files.length;
            // find number of non empty(i.e. data) files in 1 hour
            int numberOfNonEmptyFiles = withEmptyFiles ? (int) Math.ceil(index / 2.0) : index;
            int startIndex = numberOfNonEmptyFiles * 100;
            setUpHadoopFiles(finalDir, conf, files, suffixDirs, finalFiles, withEmptyFiles, cal.getTime(), index,
                    startIndex);
        }
    }

    private static Path getTargetDateDir(Path streamDirPrefix, Date commitTime) throws IOException {
        Path dateDir = DatabusStreamWaitingReader.getMinuteDirPath(streamDirPrefix, commitTime);
        return dateDir;
    }

    static Date getCommitDateForFile(String fileName, Date minuteDirTimeStamp) throws IOException {
        Calendar cal = Calendar.getInstance();
        if (minuteDirTimeStamp == null) {
            cal.setTime(startCommitTime);
        } else {
            cal.setTime(minuteDirTimeStamp);
        }
        LOG.debug("index for " + fileName + ":" + getIndex(fileName));
        cal.add(Calendar.MINUTE, getIndex(fileName));
        cal.add(Calendar.MINUTE, increment);
        LOG.debug("Commit time for file:" + fileName + " is " + cal.getTime());
        return cal.getTime();
    }

    private static int getIndex(String fileName) {
        try {
            String indexStr = fileName.substring(fileNamePrefix.length());
            LOG.debug("indexStr:" + indexStr);
            if (indexStr != null && !indexStr.isEmpty()) {
                return Integer.parseInt(indexStr);
            }
        } catch (Exception e) {
            LOG.debug("Exception while getting file index for " + fileName, e);
        }
        return 1;
    }
}