Java tutorial
/** * Flamingo HDFS File Uploader - a tool to upload from datasource to datasource and schedule jobs * * Copyright (C) 2011-2012 Cloudine. * * This file is part of Flamingo HDFS File Uploader. * * Flamingo HDFS File Uploader is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Flamingo HDFS File Uploader is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.openflamingo.uploader.handler; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.openflamingo.uploader.JobContext; import org.openflamingo.uploader.jaxb.*; import org.openflamingo.uploader.policy.SelectorPattern; import org.openflamingo.uploader.policy.SelectorPatternFactory; import org.openflamingo.uploader.util.DateUtils; import org.openflamingo.uploader.util.FileSystemScheme; import org.openflamingo.uploader.util.JVMIDUtils; import org.slf4j.Logger; import java.io.IOException; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import static org.openflamingo.uploader.util.FileSystemUtils.*; /** * Local FileSystem? ? Ingress . * * @author Edward KIM * @since 0.1 */ public class LocalToHdfsHandler implements Handler { /** * Job Logger */ private Logger jobLogger; /** * ? ?? ?. ? ? ? ?? ? ?? * ? ?? . */ public static final String PROCESSING_FILE_QUALIFIER = ".processing"; // FIXME /** * HDFS URL? Hadoop Configuration Key */ public final static String HDFS_URL = "fs.default.name"; /** * Job Tracker? Hadoop Configuration Key */ public final static String JOB_TRACKER = "mapred.job.tracker"; /** * HDFS File Uploader Job Context */ private JobContext jobContext; /** * HDFS File Uploder Job */ private Job job; /** * Job? ?? Ingress */ private Local local; /** * ??. * * @param jobContext Flamingo HDFS File Uploader? Job Context * @param job Job * @param local Local Ingress * @param jobLogger Job Logger for trace */ public LocalToHdfsHandler(JobContext jobContext, Job job, Local local, Logger jobLogger) { this.jobContext = jobContext; this.job = job; this.local = local; this.jobLogger = jobLogger; } @Override public void execute() throws Exception { // ? ?? ? ??. copyToWorkingDirectory(); // ?? ? ? ?? ??. List<FileStatus> files = getFilesFromWorkingDirectory(); if (files.size() < 1) { jobLogger.info( " ?? ? ? ."); return; } // ? ?? HDFS . Iterator<FileStatus> iterator = files.iterator(); while (iterator.hasNext()) { // ? ?? ? ? ??. FileStatus workingFile = iterator.next(); FileSystem workingFS = getFileSystem(workingFile.getPath()); // ? ?? . String processingFileName = workingFile.getPath().getName() + PROCESSING_FILE_QUALIFIER; String workingDirectory = correctPath(jobContext.getValue(local.getWorkingDirectory())); Path processingFile = new Path(workingDirectory, processingFileName); boolean renamed = workingFS.rename(workingFile.getPath(), processingFile); jobLogger.debug( " ? ? '{}'? '{}' ?? .", workingFile.getPath(), processingFile); if (renamed) { // Outgress? HDFS ??. Hdfs hdfs = job.getPolicy().getOutgress().getHdfs(); // ?? HDFS? FileSystem ??. String cluster = jobContext.getValue(hdfs.getCluster()); Configuration configuration = getConfiguration(jobContext.getModel(), cluster); FileSystem targetFS = FileSystem.get(configuration); jobLogger.info( "HDFS? Hadoop Cluster '{}'? Hadoop Cluster? ? ? .", cluster); // HDFS? target, staging . String targetDirectory = jobContext.getValue(hdfs.getTargetPath()); String stagingDirectory = jobContext.getValue(hdfs.getStagingPath()); jobLogger.info( "HDFS? ? '{}'? ? '{}'.", targetDirectory, stagingDirectory); // ? ? ?? . int hash = Math.abs((workingFile.getPath().toString() + processingFile.toString()).hashCode()) + Integer.parseInt(JVMIDUtils.generateUUID()); if (hash < 0) hash = -hash; jobLogger.debug( "? '{}'? ? '{}'? '{}'? ?.", new Object[] { stagingDirectory, processingFile.getName(), hash }); // ? ? . // FIXME Path stagingFile = new Path(stagingDirectory, DateUtils.parseDate(jobContext.getStartDate(), "yyyyMMddHHmmss") + "_" + String.valueOf(hash)); try { targetFS.copyFromLocalFile(false, false, processingFile, stagingFile); } catch (Exception ex) { jobLogger.warn( " ? ? '{}'? ? ? '{}' ? ??.", processingFile, stagingFile); copyToErrorDirectory(workingFile); continue; } jobLogger.info( " ? ? '{}'? ? ? '{}' .", processingFile, stagingFile); // ? ?? ? ??. Path targetFile = new Path(targetDirectory, workingFile.getPath().getName()); targetFS.rename(stagingFile, targetFile); jobLogger.info("? ? '{}' ?? '{}' ??.", stagingFile, targetFile); // ?? ??. copyToCompleteDirectory(workingFS.getFileStatus(processingFile)); } } } @Override public void validate() { ///////////////////////////////// // Ingress :: Local FileSystem ///////////////////////////////// String sourceDirectory = correctPath(jobContext.getValue(local.getSourceDirectory().getPath())); String workingDirectory = correctPath(jobContext.getValue(local.getWorkingDirectory())); String errorDirectory = correctPath(jobContext.getValue(local.getErrorDirectory())); String completeDirectory = correctPath(jobContext.getValue(local.getCompleteDirectory())); // Scheme? ?. checkScheme(sourceDirectory, FileSystemScheme.LOCAL); checkScheme(workingDirectory, FileSystemScheme.LOCAL); checkScheme(errorDirectory, FileSystemScheme.LOCAL); checkScheme(completeDirectory, FileSystemScheme.LOCAL); // ?? ? ? ?. validateSameFileSystem(sourceDirectory, workingDirectory); validateSameFileSystem(sourceDirectory, errorDirectory); if (completeDirectory != null) { validateSameFileSystem(sourceDirectory, completeDirectory); } // ?. testCreateDir(new Path(sourceDirectory)); testCreateDir(new Path(workingDirectory)); testCreateDir(new Path(errorDirectory)); if (local.getCompleteDirectory() != null) { testCreateDir(new Path(completeDirectory)); } ///////////////////////////////// // Outgrss :: HDFS ///////////////////////////////// String cluster = jobContext.getValue(job.getPolicy().getOutgress().getHdfs().getCluster()); Configuration configuration = this.getConfiguration(jobContext.getModel(), cluster); String stagingPath = jobContext.getValue(configuration.get(HDFS_URL) + "/" + jobContext.getValue(job.getPolicy().getOutgress().getHdfs().getStagingPath())); String targetPath = jobContext.getValue(configuration.get(HDFS_URL) + "/" + jobContext.getValue(job.getPolicy().getOutgress().getHdfs().getTargetPath())); checkScheme(stagingPath, FileSystemScheme.HDFS); checkScheme(targetPath, FileSystemScheme.HDFS); validateSameFileSystem(targetPath, stagingPath); testCreateDir(new Path(stagingPath)); testCreateDir(new Path(targetPath)); } /** * Hadoop Cluster? ? Cluster? Hadoop Configuration? ?. * * @param model HDFS File Uploader? JAXB ROOT Object * @param clusterName Hadoop Cluster * @return {@link org.apache.hadoop.conf.Configuration} */ public static org.apache.hadoop.conf.Configuration getConfiguration(Flamingo model, String clusterName) { org.apache.hadoop.conf.Configuration configuration = new org.apache.hadoop.conf.Configuration(); List<Cluster> clusters = model.getClusters().getCluster(); for (Cluster cluster : clusters) { if (clusterName.equals(cluster.getName())) { configuration.set(HDFS_URL, cluster.getFsDefaultName()); configuration.set(JOB_TRACKER, cluster.getMapredJobTracker()); List<Property> properties = cluster.getProperties().getProperty(); for (Property property : properties) { configuration.set(property.getName(), property.getValue()); } } } return configuration; } /** * ?? ?? ? ?? . * * @return ? ?(?? null) * @throws IOException ? ? , ?? ?? */ public List<FileStatus> copyToWorkingDirectory() throws IOException { // ?? ? ? ? Selector Pattern? . SelectorPattern selectorPattern = SelectorPatternFactory.getSelectorPattern( this.local.getSourceDirectory().getConditionType(), jobContext.getValue(this.local.getSourceDirectory().getCondition()), jobContext); String sourceDirectory = correctPath(jobContext.getValue(local.getSourceDirectory().getPath())); String workingDirectory = correctPath(jobContext.getValue(local.getWorkingDirectory())); FileSystem sourceDirectoryFS = getFileSystem(sourceDirectory); List<FileStatus> files = new LinkedList<FileStatus>(); for (FileStatus sourceFile : sourceDirectoryFS.listStatus(new Path(sourceDirectory))) { if (!sourceFile.isDir()) { if (sourceFile.getPath().getName().startsWith(".") || sourceFile.getPath().getName().startsWith("_") || sourceFile.getPath().getName().endsWith(".work")) { jobLogger.info(" ? '{}'? .", sourceFile.getPath()); continue; } if (selectorPattern.accept(sourceFile.getPath())) { // ?? ??. Path workPath = new Path(workingDirectory, sourceFile.getPath().getName()); sourceDirectoryFS.rename(sourceFile.getPath(), workPath); jobLogger.info("? ? '{}'? '{}' ??.", sourceFile.getPath(), workPath); files.add(sourceDirectoryFS.getFileStatus(workPath)); } } } return files; } /** * ? ?? ?? . ? ? ?? . * ? ?? ? {@link org.openflamingo.uploader.handler.LocalToHdfsHandler#PROCESSING_FILE_QUALIFIER} ?. * * @return ? ? ? ? ? ?? ?? ? * @throws IOException ? ? ? ? ?? ? */ public List<FileStatus> getFilesFromWorkingDirectory() throws IOException { String workingDirectory = correctPath(jobContext.getValue(local.getWorkingDirectory())); FileSystem workingDirectoryFS = getFileSystem(workingDirectory); List<FileStatus> files = new LinkedList<FileStatus>(); for (FileStatus fs : workingDirectoryFS.listStatus(new Path(workingDirectory))) { if (!fs.isDir()) { if (fs.getPath().getName().endsWith(PROCESSING_FILE_QUALIFIER)) { jobLogger.info("'{}' ?? ? ?? ", fs.getPath()); continue; } files.add(fs); } } return files; } /** * ? ?? ?? . ? ? ? ?? . * ? ?? ? <tt>.processing</tt> ?. * * @return ? ? ? ? ? ?? ? * @throws IOException ? ? ? ? ?? ? */ public List<FileStatus> getProcessingFilesFromWorkingDirectory() throws IOException { String workingDirectory = correctPath(jobContext.getValue(local.getWorkingDirectory())); FileSystem workingDirectoryFS = getFileSystem(workingDirectory); List<FileStatus> files = new LinkedList<FileStatus>(); for (FileStatus fs : workingDirectoryFS.listStatus(new Path(workingDirectory))) { if (!fs.isDir()) { if (fs.getPath().getName().endsWith(PROCESSING_FILE_QUALIFIER)) { files.add(fs); } } } return files; } /** * ?? ??. * * @param fileToMove ? * @return ?? ? <tt>true</tt> * @throws IOException ?? ?? */ public boolean copyToCompleteDirectory(FileStatus fileToMove) throws IOException { String workingDirectory = correctPath(jobContext.getValue(local.getWorkingDirectory())); String completeDirectory = correctPath(jobContext.getValue(local.getCompleteDirectory())); FileSystem workingDirectoryFS = getFileSystem(workingDirectory); boolean success = false; if (local.isRemoveAfterCopy()) { jobLogger.info("? . ? ? '{}'? ." + fileToMove.getPath()); success = workingDirectoryFS.delete(fileToMove.getPath(), false); if (!success) { jobLogger.info("? ? '{}'? .", fileToMove.getPath()); } } else { Path completedPath = new Path(completeDirectory, fileToMove.getPath().getName().replaceAll(PROCESSING_FILE_QUALIFIER, "")); jobLogger.info( "? . ? ? '{}'? '{}' ??.", fileToMove.getPath(), completedPath); success = workingDirectoryFS.rename(fileToMove.getPath(), completedPath); if (!success) { jobLogger.warn("? ??? ? ."); } } return success; } /** * ?? ? ??. * * @param fs ? * @return ?? ? <tt>true</tt> * @throws IOException ?? ?? */ public boolean copyToErrorDirectory(FileStatus fs) throws IOException { String workingDirectory = correctPath(jobContext.getValue(local.getWorkingDirectory())); String errorDirectory = correctPath(jobContext.getValue(local.getErrorDirectory())); FileSystem workingDirectoryFS = getFileSystem(workingDirectory); if (fs.getPath().getName().endsWith(PROCESSING_FILE_QUALIFIER)) { Path errorPath = new Path(errorDirectory, fs.getPath().getName().replaceAll(PROCESSING_FILE_QUALIFIER, "")); jobLogger.info( " ? ?? . '{}' ?? '{}' ??.", fs.getPath(), errorPath); return workingDirectoryFS.rename(fs.getPath(), errorPath); } return false; } }