Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.inmobi.messaging.consumer.util.TestUtil.java

License:Apache License

public static void cleanupCluster(ClusterUtil cluster) throws IOException {
    FileSystem fs = FileSystem.get(cluster.getHadoopConf());
    LOG.debug("Cleaning up the dir: " + cluster.getRootDir());
    fs.delete(new Path(cluster.getRootDir()), true);
}

From source file:com.intel.hadoop.hbase.dot.KEY.java

License:Apache License

private void doMapReduce(Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass,
        String mrTableName) throws IOException, ClassNotFoundException, InterruptedException {

    this.conf.set(KEY.INPUT_TABLE, mrTableName);
    Job job = new Job(this.conf);
    job.setJobName("Generate Data for [" + mrTableName + "]");
    job.setJarByClass(GenerateTestTable.class);

    job.setInputFormatClass(inputFormatClass);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path("/tmp", "tempout");
    fs.delete(path, true);

    FileOutputFormat.setOutputPath(job, path);

    job.setMapperClass(mapperClass);//w  ww .  java 2 s.  co  m
    job.setNumReduceTasks(0);

    TableMapReduceUtil.addDependencyJars(job);
    // Add a Class from the hbase.jar so it gets registered too.
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.util.Bytes.class);

    TableMapReduceUtil.initCredentials(job);

    job.waitForCompletion(true);

}

From source file:com.jointhegrid.hive_test.HiveTestBase.java

License:Apache License

public void setUp() throws Exception {
    super.setUp();

    String jarFile = org.apache.hadoop.hive.ql.exec.MapRedTask.class.getProtectionDomain().getCodeSource()
            .getLocation().getFile();//from  ww w  .ja v a  2s . c o m
    System.setProperty(HiveConf.ConfVars.HIVEJAR.toString(), jarFile);

    Path rootDir = getDir(ROOT_DIR);
    Configuration conf = createJobConf();
    FileSystem fs = FileSystem.get(conf);
    fs.delete(rootDir, true);
    Path metastorePath = new Path("/tmp/metastore_db");
    fs.delete(metastorePath, true);
    Path warehouse = new Path("/tmp/warehouse");
    fs.delete(warehouse, true);
    fs.mkdirs(warehouse);
}

From source file:com.jyz.study.hadoop.mapreduce.datajoin.DataJoinJob.java

License:Apache License

public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];// w  ww. j  a v a2s .  c o  m
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
        System.out.println("Using TextInputFormat: " + args[2]);
        inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileOutputFormat: " + args[7]);
        outputFormat = SequenceFileOutputFormat.class;
        outputValueClass = getClassByName(args[7]);
    } else {
        System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
        maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
        jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, DataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir), true);
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
}

From source file:com.kasabi.labs.freebase.mr.Freebase2RDFDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (log.isDebugEnabled()) {
        log.debug("run({})", Utils.toString(args));
    }/*from  ww  w  . j a  va2  s.c  o m*/

    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    Configuration configuration = getConf();
    boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION,
            Constants.OPTION_USE_COMPRESSION_DEFAULT);

    if (useCompression) {
        configuration.setBoolean("mapred.compress.map.output", true);
        configuration.set("mapred.output.compression.type", "BLOCK");
        configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
    }

    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT,
            Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT);
    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
    }

    Job job = new Job(configuration);
    job.setJobName("Freebase2RDFDriver");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(Freebase2RDFMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setReducerClass(Freebase2RDFReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    Utils.setReducers(job, configuration, log);

    job.setOutputFormatClass(TextOutputFormat.class);

    if (log.isDebugEnabled())
        Utils.log(job, log);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.kse.bigdata.main.Driver.java

License:Apache License

public static void main(String[] args) throws Exception {
    /**********************************************************************************
     **    Merge the source files into one.                                          **
    /**    Should change the directories of each file before executing the program   **
    ***********************************************************************************/
    //        String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug";
    //        String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv";
    //        File resultFile = new File(resultFileDirectory);
    //        if(!resultFile.exists())
    //            new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles();

    /**********************************************************************************
     * Hadoop Operation.//from   w w w  . j  a v a  2 s .c o m
     * Befort Start, Check the Length of Sequence We Want to Predict.
     **********************************************************************************/

    Configuration conf = new Configuration();

    //Enable MapReduce intermediate compression as Snappy
    conf.setBoolean("mapred.compress.map.output", true);
    conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");

    //Enable Profiling
    //conf.setBoolean("mapred.task.profile", true);

    String testPath = null;
    String inputPath = null;
    String outputPath = null;

    int sampleSize = 1;
    ArrayList<String> results = new ArrayList<String>();

    for (int index = 0; index < args.length; index++) {

        /*
         * Mandatory command
         */
        //Extract input path string from command line.
        if (args[index].equals("-in"))
            inputPath = args[index + 1];

        //Extract output path string from command line.
        if (args[index].equals("-out"))
            outputPath = args[index + 1];

        //Extract test data path string from command line.
        if (args[index].equals("-test"))
            testPath = args[index + 1];

        /*
         * Optional command
         */
        //Extract a number of neighbors.
        if (args[index].equals("-nn"))
            conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1]));

        //Whether job uses normalization or not.
        if (args[index].equals("-norm"))
            conf.setBoolean(Map.NORMALIZATION, true);

        //Extract the number of sample size to test.
        if (args[index].equals("-s"))
            sampleSize = Integer.valueOf(args[index + 1]);

        //Whether job uses mean or median
        //[Default : mean]
        if (args[index].equals("-med"))
            conf.setBoolean(Reduce.MEDIAN, true);
    }

    String outputFileName = "part-r-00000";
    SequenceSampler sampler = new SequenceSampler(testPath, sampleSize);
    LinkedList<Sequence> testSequences = sampler.getRandomSample();

    //        Test Sequence
    //        String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100";
    //        Sequence testSeq = new Sequence(testSeqString);
    //        LinkedList<Sequence> testSequences = new LinkedList<>();
    //        testSequences.add(testSeq);

    for (Sequence seq : testSequences) {

        /*
         ********************  Hadoop Launch ***********************
         */

        System.out.println(seq.getTailString());

        conf.set(Map.INPUT_SEQUENCE, seq.toString());

        Job job = new Job(conf);
        job.setJarByClass(Driver.class);
        job.setJobName("term-project-driver");

        job.setMapperClass(Map.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Text.class);

        //          Should think another way to implement the combiner class
        //          Current Implementation is not helpful to Job.
        //          job.setCombinerClass(Combiner.class);

        //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set.
        job.setNumReduceTasks(1);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(inputPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));

        job.waitForCompletion(true);

        /*
         * if job finishes, get result of the job and store it in results(list).
         */
        try {
            FileSystem hdfs = FileSystem.get(new Configuration());
            BufferedReader fileReader = new BufferedReader(
                    new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName))));

            String line;
            while ((line = fileReader.readLine()) != null) {
                results.add(seq.getSeqString() + " " + line);
            }

            fileReader.close();

            hdfs.delete(new Path(outputPath), true);
            hdfs.close();

        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    }

    /*
     * if all jobs finish, store results of jobs to output/result.txt file.
     */
    String finalOutputPath = "output/result.csv";
    try {
        FileSystem hdfs = FileSystem.get(new Configuration());
        Path file = new Path(finalOutputPath);
        if (hdfs.exists(file)) {
            hdfs.delete(file, true);
        }

        OutputStream os = hdfs.create(file);
        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8"));

        //CSV File Header
        printWriter.println("Actual,Predicted,MER,MAE");
        printWriter.flush();

        for (String result : results) {
            String[] tokens = result.split("\\s+");

            printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]);
            printWriter.flush();
        }

        printWriter.close();
        hdfs.close();
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }

}

From source file:com.kylinolap.common.persistence.HBaseResourceStore.java

License:Apache License

private Path writeLargeCellToHdfs(String resPath, byte[] largeColumn, HTableInterface table)
        throws IOException {
    Path redirectPath = bigCellHDFSPath(resPath);
    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    FileSystem fileSystem = FileSystem.get(hconf);

    if (fileSystem.exists(redirectPath)) {
        fileSystem.delete(redirectPath, true);
    }/*from   ww  w. j  a va  2  s . c  om*/

    FSDataOutputStream out = fileSystem.create(redirectPath);

    try {
        out.write(largeColumn);
    } finally {
        IOUtils.closeQuietly(out);
    }

    return redirectPath;
}

From source file:com.kylinolap.job.hadoop.AbstractHadoopJob.java

License:Apache License

protected void deletePath(Configuration conf, Path path) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(path)) {
        fs.delete(path, true);
    }//  w w  w  .  j  a  v a 2s. c  o  m
}

From source file:com.kylinolap.job.hadoop.cube.StorageCleanupJob.java

License:Apache License

private void cleanUnusedHdfsFiles(Configuration conf) throws IOException {
    JobEngineConfig engineConfig = new JobEngineConfig(KylinConfig.getInstanceFromEnv());
    CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());

    FileSystem fs = FileSystem.get(conf);
    List<String> allHdfsPathsNeedToBeDeleted = new ArrayList<String>();
    // GlobFilter filter = new
    // GlobFilter(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory()
    // + "/kylin-.*");
    FileStatus[] fStatus = fs.listStatus(new Path(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory()));
    for (FileStatus status : fStatus) {
        String path = status.getPath().getName();
        // System.out.println(path);
        if (path.startsWith(JobInstance.JOB_WORKING_DIR_PREFIX)) {
            String kylinJobPath = engineConfig.getHdfsWorkingDirectory() + "/" + path;
            allHdfsPathsNeedToBeDeleted.add(kylinJobPath);
        }/* w  w  w.ja va2  s . c  o m*/
    }

    List<JobInstance> allJobs = JobDAO.getInstance(KylinConfig.getInstanceFromEnv()).listAllJobs();
    for (JobInstance jobInstance : allJobs) {
        // only remove FINISHED and DISCARDED job intermediate files
        if (isJobInUse(jobInstance) == true) {
            String path = JobInstance.getJobWorkingDir(jobInstance, engineConfig);
            allHdfsPathsNeedToBeDeleted.remove(path);
            log.info("Remove " + path + " from deletion list, as the path belongs to job "
                    + jobInstance.getUuid() + " with status " + jobInstance.getStatus());
        }
    }

    // remove every segment working dir from deletion list
    for (CubeInstance cube : cubeMgr.listAllCubes()) {
        for (CubeSegment seg : cube.getSegments()) {
            String jobUuid = seg.getLastBuildJobID();
            if (jobUuid != null && jobUuid.equals("") == false) {
                String path = JobInstance.getJobWorkingDir(jobUuid, engineConfig.getHdfsWorkingDirectory());
                allHdfsPathsNeedToBeDeleted.remove(path);
                log.info("Remove " + path + " from deletion list, as the path belongs to segment " + seg
                        + " of cube " + cube.getName());
            }
        }
    }

    if (delete == true) {
        // remove files
        for (String hdfsPath : allHdfsPathsNeedToBeDeleted) {
            log.info("Deleting hdfs path " + hdfsPath);
            Path p = new Path(hdfsPath);
            if (fs.exists(p) == true) {
                fs.delete(p, true);
                log.info("Deleted hdfs path " + hdfsPath);
            } else {
                log.info("Hdfs path " + hdfsPath + "does not exist");
            }
        }
    } else {
        System.out.println("--------------- HDFS Path To Be Deleted ---------------");
        for (String hdfsPath : allHdfsPathsNeedToBeDeleted) {
            System.out.println(hdfsPath);
        }
        System.out.println("-------------------------------------------------------");
    }

}

From source file:com.liferay.hadoop.action.HadoopJob.java

License:Open Source License

public String doExecute(HttpServletRequest request, HttpServletResponse response) throws Exception {

    response.setContentType(ContentTypes.TEXT_PLAIN_UTF8);

    PrintWriter writer = response.getWriter();

    FileSystem fileSystem = HadoopManager.getFileSystem();

    JobClient jobClient = HadoopManager.getJobClient();

    writer.println("-- Job Status --");

    Path inputPath = new Path("/index/*/*");
    Path outputPath = new Path("/wordcount/results");

    try {/*  w w w  . j  av  a2s.  c om*/
        if (_runningJob == null) {
            writer.println("Creating job");

            if (fileSystem.exists(_jobPath)) {
                fileSystem.delete(_jobPath, false);
            }

            if (!fileSystem.exists(_jobPath)) {
                writer.println("Deploying the job code to cluster");

                FSDataOutputStream outputStream = null;

                try {
                    outputStream = fileSystem.create(_jobPath);

                    ServletContext servletContext = HadoopManager.getServletContext();

                    InputStream inputStream = servletContext.getResourceAsStream("/WEB-INF/lib/hadoop-job.jar");

                    StreamUtil.transfer(inputStream, outputStream, false);
                } finally {
                    StreamUtil.cleanUp(outputStream);
                }

                writer.println("Job code deployed to cluster");
            }

            if (fileSystem.exists(outputPath)) {
                writer.println("A previous job output was found, backing it up");

                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            _jobConf = HadoopManager.createNewJobConf();

            _jobConf.setJobName("Word Count");

            writer.println("Job '" + _jobConf.getJobName() + "' is being configured");

            _jobConf.setJarByClass(Map.class);
            _jobConf.setOutputKeyClass(Text.class);
            _jobConf.setOutputValueClass(IntWritable.class);
            _jobConf.setMapperClass(Map.class);
            _jobConf.setCombinerClass(Reduce.class);
            _jobConf.setReducerClass(Reduce.class);
            _jobConf.setInputFormat(TextInputFormat.class);
            _jobConf.setOutputFormat(TextOutputFormat.class);

            writer.println("Job code deployed to distributed cache's classpath");

            DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem);

            FileInputFormat.setInputPaths(_jobConf, inputPath);
            FileOutputFormat.setOutputPath(_jobConf, outputPath);

            writer.println("Submitting job the first time");

            _runningJob = jobClient.submitJob(_jobConf);

            writer.println("Job submitted");
        }

        int jobState = _runningJob.getJobState();

        writer.println(
                "Job status: " + jobState + " (RUNNING = 1, SUCCEEDED = 2, FAILED = 3, PREP = 4, KILLED = 5)");

        if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) {

            writer.println("Re-issuing the job");

            if (fileSystem.exists(outputPath)) {
                writer.println("A previous job output was found, backing it up");

                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            writer.println("Submitting job the first time");

            _runningJob = jobClient.submitJob(_jobConf);

            writer.println("Job submitted");
        }
    } catch (Exception ioe) {
        writer.println("Job error: ");

        ioe.printStackTrace(writer);
    }

    writer.flush();
    writer.close();

    return null;
}