Example usage for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.inmobi.messaging.consumer.util.TestUtil.java

License:Apache License

public static void cleanupCluster(ClusterUtil cluster) throws IOException {
    FileSystem fs = FileSystem.get(cluster.getHadoopConf());
    LOG.debug("Cleaning up the dir: " + cluster.getRootDir());
    fs.delete(new Path(cluster.getRootDir()), true);
}

From source file:com.intel.hadoop.hbase.dot.KEY.java

License:Apache License

private void doMapReduce(Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass,
        String mrTableName) throws IOException, ClassNotFoundException, InterruptedException {

    this.conf.set(KEY.INPUT_TABLE, mrTableName);
    Job job = new Job(this.conf);
    job.setJobName("Generate Data for [" + mrTableName + "]");
    job.setJarByClass(GenerateTestTable.class);

    job.setInputFormatClass(inputFormatClass);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path("/tmp", "tempout");
    fs.delete(path, true);

    FileOutputFormat.setOutputPath(job, path);

    job.setMapperClass(mapperClass);//w  ww .  java 2 s.  co  m
    job.setNumReduceTasks(0);

    TableMapReduceUtil.addDependencyJars(job);
    // Add a Class from the hbase.jar so it gets registered too.
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.util.Bytes.class);

    TableMapReduceUtil.initCredentials(job);

    job.waitForCompletion(true);

}

From source file:com.jointhegrid.hive_test.HiveTestBase.java

License:Apache License

public void setUp() throws Exception {
    super.setUp();

    String jarFile = org.apache.hadoop.hive.ql.exec.MapRedTask.class.getProtectionDomain().getCodeSource()
            .getLocation().getFile();//from  ww w  .ja v a  2s . c o m
    System.setProperty(HiveConf.ConfVars.HIVEJAR.toString(), jarFile);

    Path rootDir = getDir(ROOT_DIR);
    Configuration conf = createJobConf();
    FileSystem fs = FileSystem.get(conf);
    fs.delete(rootDir, true);
    Path metastorePath = new Path("/tmp/metastore_db");
    fs.delete(metastorePath, true);
    Path warehouse = new Path("/tmp/warehouse");
    fs.delete(warehouse, true);
    fs.mkdirs(warehouse);
}

From source file:com.jyz.study.hadoop.mapreduce.datajoin.DataJoinJob.java

License:Apache License

public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];// w  ww. j  a v a2s .  c o  m
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
        System.out.println("Using TextInputFormat: " + args[2]);
        inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileOutputFormat: " + args[7]);
        outputFormat = SequenceFileOutputFormat.class;
        outputValueClass = getClassByName(args[7]);
    } else {
        System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
        maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
        jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, DataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir), true);
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
}

From source file:com.kasabi.labs.freebase.mr.Freebase2RDFDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (log.isDebugEnabled()) {
        log.debug("run({})", Utils.toString(args));
    }/*from  ww  w  . j a  va2  s.c  o m*/

    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    Configuration configuration = getConf();
    boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION,
            Constants.OPTION_USE_COMPRESSION_DEFAULT);

    if (useCompression) {
        configuration.setBoolean("mapred.compress.map.output", true);
        configuration.set("mapred.output.compression.type", "BLOCK");
        configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
    }

    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT,
            Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT);
    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
    }

    Job job = new Job(configuration);
    job.setJobName("Freebase2RDFDriver");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(Freebase2RDFMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setReducerClass(Freebase2RDFReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    Utils.setReducers(job, configuration, log);

    job.setOutputFormatClass(TextOutputFormat.class);

    if (log.isDebugEnabled())
        Utils.log(job, log);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.kse.bigdata.main.Driver.java

License:Apache License

public static void main(String[] args) throws Exception {
    /**********************************************************************************
     **    Merge the source files into one.                                          **
    /**    Should change the directories of each file before executing the program   **
    ***********************************************************************************/
    //        String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug";
    //        String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv";
    //        File resultFile = new File(resultFileDirectory);
    //        if(!resultFile.exists())
    //            new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles();

    /**********************************************************************************
     * Hadoop Operation.//from   w w w  . j  a v a  2 s .c o m
     * Befort Start, Check the Length of Sequence We Want to Predict.
     **********************************************************************************/

    Configuration conf = new Configuration();

    //Enable MapReduce intermediate compression as Snappy
    conf.setBoolean("mapred.compress.map.output", true);
    conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");

    //Enable Profiling
    //conf.setBoolean("mapred.task.profile", true);

    String testPath = null;
    String inputPath = null;
    String outputPath = null;

    int sampleSize = 1;
    ArrayList<String> results = new ArrayList<String>();

    for (int index = 0; index < args.length; index++) {

        /*
         * Mandatory command
         */
        //Extract input path string from command line.
        if (args[index].equals("-in"))
            inputPath = args[index + 1];

        //Extract output path string from command line.
        if (args[index].equals("-out"))
            outputPath = args[index + 1];

        //Extract test data path string from command line.
        if (args[index].equals("-test"))
            testPath = args[index + 1];

        /*
         * Optional command
         */
        //Extract a number of neighbors.
        if (args[index].equals("-nn"))
            conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1]));

        //Whether job uses normalization or not.
        if (args[index].equals("-norm"))
            conf.setBoolean(Map.NORMALIZATION, true);

        //Extract the number of sample size to test.
        if (args[index].equals("-s"))
            sampleSize = Integer.valueOf(args[index + 1]);

        //Whether job uses mean or median
        //[Default : mean]
        if (args[index].equals("-med"))
            conf.setBoolean(Reduce.MEDIAN, true);
    }

    String outputFileName = "part-r-00000";
    SequenceSampler sampler = new SequenceSampler(testPath, sampleSize);
    LinkedList<Sequence> testSequences = sampler.getRandomSample();

    //        Test Sequence
    //        String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100";
    //        Sequence testSeq = new Sequence(testSeqString);
    //        LinkedList<Sequence> testSequences = new LinkedList<>();
    //        testSequences.add(testSeq);

    for (Sequence seq : testSequences) {

        /*
         ********************  Hadoop Launch ***********************
         */

        System.out.println(seq.getTailString());

        conf.set(Map.INPUT_SEQUENCE, seq.toString());

        Job job = new Job(conf);
        job.setJarByClass(Driver.class);
        job.setJobName("term-project-driver");

        job.setMapperClass(Map.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Text.class);

        //          Should think another way to implement the combiner class
        //          Current Implementation is not helpful to Job.
        //          job.setCombinerClass(Combiner.class);

        //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set.
        job.setNumReduceTasks(1);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(inputPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));

        job.waitForCompletion(true);

        /*
         * if job finishes, get result of the job and store it in results(list).
         */
        try {
            FileSystem hdfs = FileSystem.get(new Configuration());
            BufferedReader fileReader = new BufferedReader(
                    new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName))));

            String line;
            while ((line = fileReader.readLine()) != null) {
                results.add(seq.getSeqString() + " " + line);
            }

            fileReader.close();

            hdfs.delete(new Path(outputPath), true);
            hdfs.close();

        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    }

    /*
     * if all jobs finish, store results of jobs to output/result.txt file.
     */
    String finalOutputPath = "output/result.csv";
    try {
        FileSystem hdfs = FileSystem.get(new Configuration());
        Path file = new Path(finalOutputPath);
        if (hdfs.exists(file)) {
            hdfs.delete(file, true);
        }

        OutputStream os = hdfs.create(file);
        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8"));

        //CSV File Header
        printWriter.println("Actual,Predicted,MER,MAE");
        printWriter.flush();

        for (String result : results) {
            String[] tokens = result.split("\\s+");

            printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]);
            printWriter.flush();
        }

        printWriter.close();
        hdfs.close();
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }

}

From source file:com.kylinolap.common.persistence.HBaseResourceStore.java

License:Apache License

private Path writeLargeCellToHdfs(String resPath, byte[] largeColumn, HTableInterface table)
        throws IOException {
    Path redirectPath = bigCellHDFSPath(resPath);
    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    FileSystem fileSystem = FileSystem.get(hconf);

    if (fileSystem.exists(redirectPath)) {
        fileSystem.delete(redirectPath, true);
    }/*from   ww  w. j  a va  2  s . c  om*/

    FSDataOutputStream out = fileSystem.create(redirectPath);

    try {
        out.write(largeColumn);
    } finally {
        IOUtils.closeQuietly(out);
    }

    return redirectPath;
}

From source file:com.kylinolap.job.hadoop.AbstractHadoopJob.java

License:Apache License

protected void deletePath(Configuration conf, Path path) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(path)) {
        fs.delete(path, true);
    }//  w w  w  .  j  a  v a 2s. c  o  m
}

From source file:com.kylinolap.job.hadoop.cube.StorageCleanupJob.java

License:Apache License

private void cleanUnusedHdfsFiles(Configuration conf) throws IOException {
    JobEngineConfig engineConfig = new JobEngineConfig(KylinConfig.getInstanceFromEnv());
    CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());

    FileSystem fs = FileSystem.get(conf);
    List<String> allHdfsPathsNeedToBeDeleted = new ArrayList<String>();
    // GlobFilter filter = new
    // GlobFilter(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory()
    // + "/kylin-.*");
    FileStatus[] fStatus = fs.listStatus(new Path(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory()));
    for (FileStatus status : fStatus) {
        String path = status.getPath().getName();
        // System.out.println(path);
        if (path.startsWith(JobInstance.JOB_WORKING_DIR_PREFIX)) {
            String kylinJobPath = engineConfig.getHdfsWorkingDirectory() + "/" + path;
            allHdfsPathsNeedToBeDeleted.add(kylinJobPath);
        }/* w  w  w.ja va2  s . c  o m*/
    }

    List<JobInstance> allJobs = JobDAO.getInstance(KylinConfig.getInstanceFromEnv()).listAllJobs();
    for (JobInstance jobInstance : allJobs) {
        // only remove FINISHED and DISCARDED job intermediate files
        if (isJobInUse(jobInstance) == true) {
            String path = JobInstance.getJobWorkingDir(jobInstance, engineConfig);
            allHdfsPathsNeedToBeDeleted.remove(path);
            log.info("Remove " + path + " from deletion list, as the path belongs to job "
                    + jobInstance.getUuid() + " with status " + jobInstance.getStatus());
        }
    }

    // remove every segment working dir from deletion list
    for (CubeInstance cube : cubeMgr.listAllCubes()) {
        for (CubeSegment seg : cube.getSegments()) {
            String jobUuid = seg.getLastBuildJobID();
            if (jobUuid != null && jobUuid.equals("") == false) {
                String path = JobInstance.getJobWorkingDir(jobUuid, engineConfig.getHdfsWorkingDirectory());
                allHdfsPathsNeedToBeDeleted.remove(path);
                log.info("Remove " + path + " from deletion list, as the path belongs to segment " + seg
                        + " of cube " + cube.getName());
            }
        }
    }

    if (delete == true) {
        // remove files
        for (String hdfsPath : allHdfsPathsNeedToBeDeleted) {
            log.info("Deleting hdfs path " + hdfsPath);
            Path p = new Path(hdfsPath);
            if (fs.exists(p) == true) {
                fs.delete(p, true);
                log.info("Deleted hdfs path " + hdfsPath);
            } else {
                log.info("Hdfs path " + hdfsPath + "does not exist");
            }
        }
    } else {
        System.out.println("--------------- HDFS Path To Be Deleted ---------------");
        for (String hdfsPath : allHdfsPathsNeedToBeDeleted) {
            System.out.println(hdfsPath);
        }
        System.out.println("-------------------------------------------------------");
    }

}

From source file:com.liferay.hadoop.action.HadoopJob.java

License:Open Source License

public String doExecute(HttpServletRequest request, HttpServletResponse response) throws Exception {

    response.setContentType(ContentTypes.TEXT_PLAIN_UTF8);

    PrintWriter writer = response.getWriter();

    FileSystem fileSystem = HadoopManager.getFileSystem();

    JobClient jobClient = HadoopManager.getJobClient();

    writer.println("-- Job Status --");

    Path inputPath = new Path("/index/*/*");
    Path outputPath = new Path("/wordcount/results");

    try {/*  w w w  . j  av  a2s.  c om*/
        if (_runningJob == null) {
            writer.println("Creating job");

            if (fileSystem.exists(_jobPath)) {
                fileSystem.delete(_jobPath, false);
            }

            if (!fileSystem.exists(_jobPath)) {
                writer.println("Deploying the job code to cluster");

                FSDataOutputStream outputStream = null;

                try {
                    outputStream = fileSystem.create(_jobPath);

                    ServletContext servletContext = HadoopManager.getServletContext();

                    InputStream inputStream = servletContext.getResourceAsStream("/WEB-INF/lib/hadoop-job.jar");

                    StreamUtil.transfer(inputStream, outputStream, false);
                } finally {
                    StreamUtil.cleanUp(outputStream);
                }

                writer.println("Job code deployed to cluster");
            }

            if (fileSystem.exists(outputPath)) {
                writer.println("A previous job output was found, backing it up");

                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            _jobConf = HadoopManager.createNewJobConf();

            _jobConf.setJobName("Word Count");

            writer.println("Job '" + _jobConf.getJobName() + "' is being configured");

            _jobConf.setJarByClass(Map.class);
            _jobConf.setOutputKeyClass(Text.class);
            _jobConf.setOutputValueClass(IntWritable.class);
            _jobConf.setMapperClass(Map.class);
            _jobConf.setCombinerClass(Reduce.class);
            _jobConf.setReducerClass(Reduce.class);
            _jobConf.setInputFormat(TextInputFormat.class);
            _jobConf.setOutputFormat(TextOutputFormat.class);

            writer.println("Job code deployed to distributed cache's classpath");

            DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem);

            FileInputFormat.setInputPaths(_jobConf, inputPath);
            FileOutputFormat.setOutputPath(_jobConf, outputPath);

            writer.println("Submitting job the first time");

            _runningJob = jobClient.submitJob(_jobConf);

            writer.println("Job submitted");
        }

        int jobState = _runningJob.getJobState();

        writer.println(
                "Job status: " + jobState + " (RUNNING = 1, SUCCEEDED = 2, FAILED = 3, PREP = 4, KILLED = 5)");

        if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) {

            writer.println("Re-issuing the job");

            if (fileSystem.exists(outputPath)) {
                writer.println("A previous job output was found, backing it up");

                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            writer.println("Submitting job the first time");

            _runningJob = jobClient.submitJob(_jobConf);

            writer.println("Job submitted");
        }
    } catch (Exception ioe) {
        writer.println("Job error: ");

        ioe.printStackTrace(writer);
    }

    writer.flush();
    writer.close();

    return null;
}