com.inmobi.conduit.utils.CollapseFilesInDir.java Source code

Java tutorial

Introduction

Here is the source code for com.inmobi.conduit.utils.CollapseFilesInDir.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.inmobi.conduit.utils;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;

import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.OutputStreamWriter;
import java.util.HashSet;
import java.util.Set;

public class CollapseFilesInDir {
    private static Logger LOG = Logger.getLogger(CollapseFilesInDir.class);

    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration();
        configuration.set("fs.default.name", args[0]);
        String dir = args[1];
        FileSystem fs = FileSystem.get(configuration);
        FileStatus[] fileList;
        try {
            fileList = fs.listStatus(new Path(dir));
        } catch (FileNotFoundException fe) {
            fileList = null;
        }
        if (fileList != null) {
            if (fileList.length > 1) {
                Set<Path> sourceFiles = new HashSet<Path>();
                Set<String> consumePaths = new HashSet<String>();
                //inputPath has have multiple files due to backlog
                //read all and create a tmp file
                for (int i = 0; i < fileList.length; i++) {
                    Path consumeFilePath = fileList[i].getPath().makeQualified(fs);
                    sourceFiles.add(consumeFilePath);
                    FSDataInputStream fsDataInputStream = fs.open(consumeFilePath);
                    try {
                        while (fsDataInputStream.available() > 0) {
                            String fileName = fsDataInputStream.readLine();
                            if (fileName != null) {
                                consumePaths.add(fileName.trim());
                                System.out.println("Adding [" + fileName + "] to pull");
                            }
                        }
                    } finally {
                        fsDataInputStream.close();
                    }
                }
                Path finalPath = new Path(dir, new Long(System.currentTimeMillis()).toString());
                FSDataOutputStream out = fs.create(finalPath);
                BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
                try {
                    for (String consumePath : consumePaths) {
                        System.out.println("Adding sourceFile [" + consumePath + "] to" + " distcp " + "FinalList");
                        writer.write(consumePath);
                        writer.write("\n");
                    }
                } finally {
                    writer.close();
                }
                LOG.warn("Final File - [" + finalPath + "]");
                for (Path deletePath : sourceFiles) {
                    System.out.println("Deleting - [" + deletePath + "]");
                    fs.delete(deletePath);
                }
            }
        }
    }
}