org.openflamingo.uploader.handler.LocalToHdfsHandler.java Source code

Java tutorial

Introduction

Here is the source code for org.openflamingo.uploader.handler.LocalToHdfsHandler.java

Source

/**
 * Flamingo HDFS File Uploader - a tool to upload from datasource to datasource and schedule jobs
 *
 * Copyright (C) 2011-2012 Cloudine.
 *
 * This file is part of Flamingo HDFS File Uploader.
 *
 * Flamingo HDFS File Uploader is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Flamingo HDFS File Uploader is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.openflamingo.uploader.handler;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.openflamingo.uploader.JobContext;
import org.openflamingo.uploader.jaxb.*;
import org.openflamingo.uploader.policy.SelectorPattern;
import org.openflamingo.uploader.policy.SelectorPatternFactory;
import org.openflamingo.uploader.util.DateUtils;
import org.openflamingo.uploader.util.FileSystemScheme;
import org.openflamingo.uploader.util.JVMIDUtils;
import org.slf4j.Logger;

import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

import static org.openflamingo.uploader.util.FileSystemUtils.*;

/**
 * Local FileSystem? ? Ingress   .
 *
 * @author Edward KIM
 * @since 0.1
 */
public class LocalToHdfsHandler implements Handler {

    /**
     * Job Logger
     */
    private Logger jobLogger;

    /**
     * ? ?? ?.  ? ? ? ?? ?  ??
     *    ? ??          .
     */
    public static final String PROCESSING_FILE_QUALIFIER = ".processing"; // FIXME

    /**
     * HDFS URL?  Hadoop Configuration Key
     */
    public final static String HDFS_URL = "fs.default.name";

    /**
     * Job Tracker?  Hadoop Configuration Key
     */
    public final static String JOB_TRACKER = "mapred.job.tracker";

    /**
     * HDFS File Uploader Job Context
     */
    private JobContext jobContext;

    /**
     * HDFS File Uploder Job
     */
    private Job job;

    /**
     * Job? ??  Ingress 
     */
    private Local local;

    /**
     *  ??.
     *
     * @param jobContext Flamingo HDFS File Uploader? Job Context
     * @param job        Job
     * @param local      Local Ingress
     * @param jobLogger  Job Logger for trace
     */
    public LocalToHdfsHandler(JobContext jobContext, Job job, Local local, Logger jobLogger) {
        this.jobContext = jobContext;
        this.job = job;
        this.local = local;
        this.jobLogger = jobLogger;
    }

    @Override
    public void execute() throws Exception {
        // ? ?? ?   ??.
        copyToWorkingDirectory();

        // ??  ? ? ?? ??.
        List<FileStatus> files = getFilesFromWorkingDirectory();

        if (files.size() < 1) {
            jobLogger.info(
                    " ??  ?   ? .");
            return;
        }

        //  ? ??   HDFS .
        Iterator<FileStatus> iterator = files.iterator();
        while (iterator.hasNext()) {
            //  ? ??  ? ? ??.
            FileStatus workingFile = iterator.next();
            FileSystem workingFS = getFileSystem(workingFile.getPath());

            // ? ??  .
            String processingFileName = workingFile.getPath().getName() + PROCESSING_FILE_QUALIFIER;
            String workingDirectory = correctPath(jobContext.getValue(local.getWorkingDirectory()));
            Path processingFile = new Path(workingDirectory, processingFileName);
            boolean renamed = workingFS.rename(workingFile.getPath(), processingFile);
            jobLogger.debug(
                    " ? ? '{}'? '{}' ??   .",
                    workingFile.getPath(), processingFile);

            if (renamed) {
                // Outgress? HDFS  ??.
                Hdfs hdfs = job.getPolicy().getOutgress().getHdfs();

                // ??  HDFS? FileSystem  ??.
                String cluster = jobContext.getValue(hdfs.getCluster());
                Configuration configuration = getConfiguration(jobContext.getModel(), cluster);
                FileSystem targetFS = FileSystem.get(configuration);
                jobLogger.info(
                        "HDFS?    Hadoop Cluster '{}'? Hadoop Cluster? ? ? .",
                        cluster);

                // HDFS? target, staging  .
                String targetDirectory = jobContext.getValue(hdfs.getTargetPath());
                String stagingDirectory = jobContext.getValue(hdfs.getStagingPath());
                jobLogger.info(
                        "HDFS?     ?  '{}'? ?  '{}'.",
                        targetDirectory, stagingDirectory);

                // ? ?  ??  .
                int hash = Math.abs((workingFile.getPath().toString() + processingFile.toString()).hashCode())
                        + Integer.parseInt(JVMIDUtils.generateUUID());
                if (hash < 0)
                    hash = -hash;
                jobLogger.debug(
                        "?  '{}'?  ? '{}'?   '{}'? ?.",
                        new Object[] { stagingDirectory, processingFile.getName(), hash });

                // ? ? .
                // FIXME
                Path stagingFile = new Path(stagingDirectory,
                        DateUtils.parseDate(jobContext.getStartDate(), "yyyyMMddHHmmss") + "_"
                                + String.valueOf(hash));
                try {
                    targetFS.copyFromLocalFile(false, false, processingFile, stagingFile);
                } catch (Exception ex) {
                    jobLogger.warn(
                            " ? ? '{}'? ? ? '{}'    ?  ??.",
                            processingFile, stagingFile);
                    copyToErrorDirectory(workingFile);
                    continue;
                }
                jobLogger.info(
                        " ? ? '{}'? ? ? '{}' .",
                        processingFile, stagingFile);

                // ? ??  ?  ??.
                Path targetFile = new Path(targetDirectory, workingFile.getPath().getName());
                targetFS.rename(stagingFile, targetFile);
                jobLogger.info("? ? '{}' ?? '{}' ??.",
                        stagingFile, targetFile);

                //  ??   ??.
                copyToCompleteDirectory(workingFS.getFileStatus(processingFile));
            }
        }
    }

    @Override
    public void validate() {
        /////////////////////////////////
        // Ingress :: Local FileSystem
        /////////////////////////////////

        String sourceDirectory = correctPath(jobContext.getValue(local.getSourceDirectory().getPath()));
        String workingDirectory = correctPath(jobContext.getValue(local.getWorkingDirectory()));
        String errorDirectory = correctPath(jobContext.getValue(local.getErrorDirectory()));
        String completeDirectory = correctPath(jobContext.getValue(local.getCompleteDirectory()));

        // Scheme?  ?.
        checkScheme(sourceDirectory, FileSystemScheme.LOCAL);
        checkScheme(workingDirectory, FileSystemScheme.LOCAL);
        checkScheme(errorDirectory, FileSystemScheme.LOCAL);
        checkScheme(completeDirectory, FileSystemScheme.LOCAL);

        // ?? ? ?  ?.
        validateSameFileSystem(sourceDirectory, workingDirectory);
        validateSameFileSystem(sourceDirectory, errorDirectory);
        if (completeDirectory != null) {
            validateSameFileSystem(sourceDirectory, completeDirectory);
        }

        //   ?.
        testCreateDir(new Path(sourceDirectory));
        testCreateDir(new Path(workingDirectory));
        testCreateDir(new Path(errorDirectory));
        if (local.getCompleteDirectory() != null) {
            testCreateDir(new Path(completeDirectory));
        }

        /////////////////////////////////
        // Outgrss :: HDFS
        /////////////////////////////////

        String cluster = jobContext.getValue(job.getPolicy().getOutgress().getHdfs().getCluster());
        Configuration configuration = this.getConfiguration(jobContext.getModel(), cluster);
        String stagingPath = jobContext.getValue(configuration.get(HDFS_URL) + "/"
                + jobContext.getValue(job.getPolicy().getOutgress().getHdfs().getStagingPath()));
        String targetPath = jobContext.getValue(configuration.get(HDFS_URL) + "/"
                + jobContext.getValue(job.getPolicy().getOutgress().getHdfs().getTargetPath()));

        checkScheme(stagingPath, FileSystemScheme.HDFS);
        checkScheme(targetPath, FileSystemScheme.HDFS);

        validateSameFileSystem(targetPath, stagingPath);

        testCreateDir(new Path(stagingPath));
        testCreateDir(new Path(targetPath));
    }

    /**
     * Hadoop Cluster? ? Cluster? Hadoop Configuration? ?.
     *
     * @param model       HDFS File Uploader? JAXB ROOT Object
     * @param clusterName Hadoop Cluster
     * @return {@link org.apache.hadoop.conf.Configuration}
     */
    public static org.apache.hadoop.conf.Configuration getConfiguration(Flamingo model, String clusterName) {
        org.apache.hadoop.conf.Configuration configuration = new org.apache.hadoop.conf.Configuration();
        List<Cluster> clusters = model.getClusters().getCluster();
        for (Cluster cluster : clusters) {
            if (clusterName.equals(cluster.getName())) {
                configuration.set(HDFS_URL, cluster.getFsDefaultName());
                configuration.set(JOB_TRACKER, cluster.getMapredJobTracker());

                List<Property> properties = cluster.getProperties().getProperty();
                for (Property property : properties) {
                    configuration.set(property.getName(), property.getValue());
                }
            }
        }
        return configuration;
    }

    /**
     *  ??    ??  ? ?? .
     *
     * @return  ? ?(??    null)
     * @throws IOException ? ?   , ?? ??   
     */
    public List<FileStatus> copyToWorkingDirectory() throws IOException {
        // ?? ?  ? ?   Selector Pattern? .
        SelectorPattern selectorPattern = SelectorPatternFactory.getSelectorPattern(
                this.local.getSourceDirectory().getConditionType(),
                jobContext.getValue(this.local.getSourceDirectory().getCondition()), jobContext);
        String sourceDirectory = correctPath(jobContext.getValue(local.getSourceDirectory().getPath()));
        String workingDirectory = correctPath(jobContext.getValue(local.getWorkingDirectory()));

        FileSystem sourceDirectoryFS = getFileSystem(sourceDirectory);
        List<FileStatus> files = new LinkedList<FileStatus>();
        for (FileStatus sourceFile : sourceDirectoryFS.listStatus(new Path(sourceDirectory))) {
            if (!sourceFile.isDir()) {
                if (sourceFile.getPath().getName().startsWith(".") || sourceFile.getPath().getName().startsWith("_")
                        || sourceFile.getPath().getName().endsWith(".work")) {
                    jobLogger.info(" ? '{}'?   .",
                            sourceFile.getPath());
                    continue;
                }
                if (selectorPattern.accept(sourceFile.getPath())) {
                    // ??   ??.
                    Path workPath = new Path(workingDirectory, sourceFile.getPath().getName());
                    sourceDirectoryFS.rename(sourceFile.getPath(), workPath);
                    jobLogger.info("? ? '{}'?   '{}' ??.",
                            sourceFile.getPath(), workPath);
                    files.add(sourceDirectoryFS.getFileStatus(workPath));
                }
            }
        }
        return files;
    }

    /**
     *  ? ?? ?? . ? ? ?? .
     * ? ?? ? {@link org.openflamingo.uploader.handler.LocalToHdfsHandler#PROCESSING_FILE_QUALIFIER} ?.
     *
     * @return  ? ? ? ? ? ??  ??  ?
     * @throws IOException ? ?    ? ? ?? ?   
     */
    public List<FileStatus> getFilesFromWorkingDirectory() throws IOException {
        String workingDirectory = correctPath(jobContext.getValue(local.getWorkingDirectory()));
        FileSystem workingDirectoryFS = getFileSystem(workingDirectory);
        List<FileStatus> files = new LinkedList<FileStatus>();
        for (FileStatus fs : workingDirectoryFS.listStatus(new Path(workingDirectory))) {
            if (!fs.isDir()) {
                if (fs.getPath().getName().endsWith(PROCESSING_FILE_QUALIFIER)) {
                    jobLogger.info("'{}' ??  ? ?? ",
                            fs.getPath());
                    continue;
                }
                files.add(fs);
            }
        }
        return files;
    }

    /**
     *  ? ?? ?? . ? ? ? ?? .
     * ? ?? ? <tt>.processing</tt> ?.
     *
     * @return  ? ? ? ? ? ??  ?
     * @throws IOException ? ?    ? ? ?? ?   
     */
    public List<FileStatus> getProcessingFilesFromWorkingDirectory() throws IOException {
        String workingDirectory = correctPath(jobContext.getValue(local.getWorkingDirectory()));
        FileSystem workingDirectoryFS = getFileSystem(workingDirectory);
        List<FileStatus> files = new LinkedList<FileStatus>();
        for (FileStatus fs : workingDirectoryFS.listStatus(new Path(workingDirectory))) {
            if (!fs.isDir()) {
                if (fs.getPath().getName().endsWith(PROCESSING_FILE_QUALIFIER)) {
                    files.add(fs);
                }
            }
        }
        return files;
    }

    /**
     * ??   ??.
     *
     * @param fileToMove ?
     * @return ?? ? <tt>true</tt>
     * @throws IOException ?? ??   
     */
    public boolean copyToCompleteDirectory(FileStatus fileToMove) throws IOException {
        String workingDirectory = correctPath(jobContext.getValue(local.getWorkingDirectory()));
        String completeDirectory = correctPath(jobContext.getValue(local.getCompleteDirectory()));
        FileSystem workingDirectoryFS = getFileSystem(workingDirectory);

        boolean success = false;
        if (local.isRemoveAfterCopy()) {
            jobLogger.info("?  . ? ? '{}'? ."
                    + fileToMove.getPath());
            success = workingDirectoryFS.delete(fileToMove.getPath(), false);
            if (!success) {
                jobLogger.info("? ? '{}'? .", fileToMove.getPath());
            }
        } else {
            Path completedPath = new Path(completeDirectory,
                    fileToMove.getPath().getName().replaceAll(PROCESSING_FILE_QUALIFIER, ""));
            jobLogger.info(
                    "?  . ? ? '{}'? '{}' ??.",
                    fileToMove.getPath(), completedPath);
            success = workingDirectoryFS.rename(fileToMove.getPath(), completedPath);
            if (!success) {
                jobLogger.warn("? ??? ? .");
            }
        }
        return success;
    }

    /**
     * ?? ?  ??.
     *
     * @param fs ?
     * @return ?? ? <tt>true</tt>
     * @throws IOException ?? ??   
     */
    public boolean copyToErrorDirectory(FileStatus fs) throws IOException {
        String workingDirectory = correctPath(jobContext.getValue(local.getWorkingDirectory()));
        String errorDirectory = correctPath(jobContext.getValue(local.getErrorDirectory()));
        FileSystem workingDirectoryFS = getFileSystem(workingDirectory);
        if (fs.getPath().getName().endsWith(PROCESSING_FILE_QUALIFIER)) {
            Path errorPath = new Path(errorDirectory,
                    fs.getPath().getName().replaceAll(PROCESSING_FILE_QUALIFIER, ""));
            jobLogger.info(
                    " ? ?? . '{}' ?? '{}' ??.",
                    fs.getPath(), errorPath);
            return workingDirectoryFS.rename(fs.getPath(), errorPath);
        }
        return false;
    }
}