com.hdfstoftp.main.HdfsToFtp.java Source code

Java tutorial

Introduction

Here is the source code for com.hdfstoftp.main.HdfsToFtp.java

Source

package com.hdfstoftp.main;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;

/**
 * Licensed to the ctyun,this can be only used in ctyun company
 *
 */

import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang.time.DateUtils;
import org.apache.commons.net.ftp.FTPClient;
import org.apache.commons.net.ftp.FTPFile;
import org.apache.commons.pool.impl.contrib.FTPClientPool;
import org.apache.commons.pool.impl.contrib.FtpClientFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.mortbay.log.Log;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.hdfstoftp.config.Config;
import com.hdfstoftp.service.UploadFileTask;

/**
 * ?hdfsftp
 * 
 * @author 
 * @mail wanghouda@126.com
 */
public class HdfsToFtp {

    /**
     * ?
     */
    private static final Logger logger = LoggerFactory.getLogger("file");
    private static final Logger logger_failed = LoggerFactory.getLogger("failed");

    /**
     * ??
     * 
     * @param str
     */
    static void printAndExit(String str) {
        logger.error(str);
        System.exit(1);
    }

    /**
     * ?
     * 
     * @param args
     * @throws IOException
     * @throws ParseException
     */

    public static void main(String[] args) throws IOException, ParseException {
        args = new String[] { "D:/input", "/home/heaven/whd", "-c d:/conf/hdfs-to-ftp.properties",
                "-t 20150820000000", "-r .*bak.*", "-o false" };
        // args = new String[] { "d:/failed", "/home/heaven/whd" };

        try {
            logger.info("your input param is=" + Arrays.toString(args));
            Config config = new Config(args);
            logger.info("your config is =" + config.toString());
            copyFromHDFSToFTP(config);
        } catch (URISyntaxException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * ?
     * 
     * @param srcFS
     *            
     * @param src
     *            ?
     * @param dst
     *            
     * @param queryStr
     *            
     * @param deleteSource
     *            ??
     * @param overwrite
     *            ????
     * @return boolean
     * @throws Exception
     */
    private static boolean copyFromHDFSToFTP(Config config) throws Exception {
        // ?hdfs
        Configuration conf = new Configuration();
        FileSystem srcFS = FileSystem.get(conf);
        long start = System.currentTimeMillis();
        boolean isRename = config.isRenameUploaded();
        int retryTimes = config.getRetryTimes();
        // ?
        String dstPath = config.getDestDir();
        Path src = new Path(config.getSouceDir());
        FileStatus fileStatus = srcFS.getFileStatus(src);
        String subDir = null;
        if (fileStatus.isDirectory()) {// 
            if (isRename) {// ??rename
                subDir = Config.RENAME_DIR;
                srcFS.mkdirs(new Path(fileStatus.getPath(), subDir));
            }
            int threadNum = config.getThreadNum();
            // 
            ExecutorService threadPool = Executors.newFixedThreadPool(threadNum);
            // ?ftp
            FTPClientPool ftpPool = new FTPClientPool(threadNum, new FtpClientFactory(config.getFTPClientConfig()));
            FTPClient ftpClient = ftpPool.borrowObject();
            // ?
            ftpClient.makeDirectory(dstPath);
            ftpPool.returnObject(ftpClient);
            // ??
            FileStatus contents[] = srcFS.listStatus(src);
            long beginFilter = 0;
            long endFileter = 0;

            if (config.getCommandLine().hasOption("d") || config.getCommandLine().hasOption("h")
                    || config.getCommandLine().hasOption("t")) {// ?"["
                beginFilter = System.currentTimeMillis();
                Long[] timeRange = parseTimeRange(config.getCommandLine());
                contents = getNewContents(timeRange, contents);
                endFileter = System.currentTimeMillis();
            }
            // ?
            if (config.getCommandLine().hasOption("r")) {// "["??
                beginFilter = System.currentTimeMillis();
                contents = getFilterContents(config.getCommandLine().getOptionValue("r").trim(), contents);
                endFileter = System.currentTimeMillis();
            }
            logger.info("total file count:" + contents.length);
            Map<String, String> fileNameMap = null;
            long beginSkip = 0;
            long endSkip = 0;
            boolean overwrite = true;
            if (config.getCommandLine().hasOption("o")) {
                overwrite = "true".equals(config.getCommandLine().getOptionValue("o").trim());
            }
            if (!overwrite) {// ?????
                beginSkip = System.currentTimeMillis();
                fileNameMap = getFileNameMap(dstPath, ftpPool);
                endSkip = System.currentTimeMillis();
            }
            int skiped = 0;

            List<Future<?>> futureList = new ArrayList<Future<?>>();
            for (int i = 0; i < contents.length; i++) {
                if (!overwrite && fileNameMap.containsKey(contents[i].getPath().getName())) {
                    // 
                    skiped++;
                    Log.info("skiped filename:" + contents[i].getPath().getName());
                    continue;
                }
                if (contents[i].isDirectory()) {
                    continue;
                }
                // ???
                Future<?> future = threadPool.submit(new UploadFileTask(srcFS, contents[i].getPath(),
                        new Path(dstPath, contents[i].getPath().getName()), ftpPool, false, isRename, subDir,
                        retryTimes));
                futureList.add(future);
            }
            int transfered = 0;
            int failed = 0;
            for (Future<?> future : futureList) {
                Boolean computeResult = (Boolean) future.get();
                if (computeResult) {
                    transfered++;
                    if (transfered % 50 == 0 || transfered == contents.length) {
                        logger.info("have transfered:" + transfered + " files");
                    }
                } else {
                    failed++;
                    logger.error("failed transter:" + failed + " files");
                }
            }
            // 
            threadPool.shutdown();
            // FTPCient
            ftpPool.close();
            // ****************
            logger.info("filter time:" + (endFileter - beginFilter) + " ms");
            if (!overwrite) {
                logger.info("skip time:" + (endSkip - beginSkip) + " ms");
            }
            logger.info("total file count:" + contents.length);
            logger.info("total transtered: " + transfered + ",total failed:" + failed + ",total skiped:" + skiped);

        } else {// 

            BufferedReader reader = null;
            FtpClientFactory facotry = new FtpClientFactory(config.getFTPClientConfig());
            FTPClient ftpClient = null;
            InputStream in = null;
            try {
                Path path = fileStatus.getPath();
                if (!path.getName().contains("log")) {

                }
                reader = new BufferedReader(new FileReader(new File(path.toUri().getPath())));
                String str = null;

                ftpClient = facotry.makeObject();

                while ((str = reader.readLine()) != null) {
                    String[] feilds = str.split("&");
                    Path filePath = null;
                    if (feilds.length == 2 && feilds[1] != "") {
                        filePath = new Path(feilds[1]);
                        in = srcFS.open(filePath);
                        boolean result = ftpClient.storeFile(dstPath, in);
                        System.out.println(ftpClient.getReplyCode());
                        if (result) {
                            logger.info(filePath.toString());
                        } else {
                            logger_failed.info(filePath.toString());
                        }
                    } else {
                        continue;
                    }

                }
            } catch (Exception e) {
                e.printStackTrace();

            } finally {
                in.close();
                reader.close();
                facotry.destroyObject(ftpClient);
            }

        }
        long end = System.currentTimeMillis();
        logger.info("finished transfer,total time:" + (end - start) / 1000 + "s");
        return true;
    }

    public static Long[] parseTimeRange(CommandLine commandLine) throws ParseException, java.text.ParseException {

        SimpleDateFormat sdf = null;
        Long beginTime = null;
        Long endTime = null;

        if (commandLine.hasOption("d")) {
            sdf = new SimpleDateFormat(Config.FORMAT_DATE);
            Date beginDate = sdf.parse(commandLine.getOptionValue("d").trim());
            beginTime = beginDate.getTime();
            Date endDate = DateUtils.addDays(beginDate, 1);
            endTime = endDate.getTime();
        } else if (commandLine.hasOption("h")) {
            sdf = new SimpleDateFormat(Config.FORMAT_HOUR);
            Date beginDate = sdf.parse(commandLine.getOptionValue("h").trim());
            beginTime = beginDate.getTime();
            Date endDate = DateUtils.addHours(beginDate, 1);
            endTime = endDate.getTime();

        } else if (commandLine.hasOption("t")) {
            sdf = new SimpleDateFormat(Config.FORMAT_SS);
            Date beginDate = sdf.parse(commandLine.getOptionValue("t").trim());
            beginTime = beginDate.getTime();
        }

        Long[] timeRange = { beginTime, endTime };
        return timeRange;
    }

    /**
     * ??Map
     * 
     * @param path
     * @return Map<String, String>
     * @throws Exception
     * @throws IllegalStateException
     * @throws NoSuchElementException
     */
    public static Map<String, String> getFileNameMap(String path, FTPClientPool ftpPool)
            throws NoSuchElementException, IllegalStateException, Exception {
        Map<String, String> fileNameMap = new HashMap<String, String>();
        FTPClient client = ftpPool.borrowObject();
        FTPFile[] files = client.listFiles(path);
        ftpPool.returnObject(client);
        for (FTPFile file : files) {
            if (file.isFile()) {
                fileNameMap.put(file.getName(), "");
            }
        }
        return fileNameMap;
    }

    /**
     * ??
     * 
     * @param queryStr
     * @param contents
     * @return FileStatus[]
     */
    public static FileStatus[] getFilterContents(String reg, FileStatus[] contents) {

        Pattern pattern = Pattern.compile(reg);
        List<FileStatus> statusList = new ArrayList<FileStatus>();
        for (FileStatus status : contents) {
            if (!status.isDirectory()) {
                String fileName = status.getPath().getName();
                Matcher matcher = pattern.matcher(fileName);
                if (matcher.matches()) {
                    statusList.add(status);
                }
            }
        }
        return statusList.toArray(new FileStatus[statusList.size()]);
    }

    /**
     * filter files
     * 
     * @param timeRange
     * @param fileStatus
     * @return FileStatus[]
     */
    public static FileStatus[] getNewContents(Long[] timeRange, FileStatus[] fileStatus) {
        List<FileStatus> statusList = new ArrayList<FileStatus>();
        for (int i = 0; i < fileStatus.length; i++) {
            long modificationTime = fileStatus[i].getModificationTime();
            if (timeRange[1] != null) {
                if (timeRange[0] < modificationTime && modificationTime < timeRange[1]) {
                    statusList.add(fileStatus[i]);
                }
            } else {
                if (timeRange[0] < modificationTime) {
                    statusList.add(fileStatus[i]);
                }
            }
        }
        return statusList.toArray(new FileStatus[statusList.size()]);
    }

}