Example usage for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.linkedin.thirdeye.hadoop.backfill.BackfillPhaseJob.java

License:Apache License

public Job run() throws Exception {

    Job job = Job.getInstance(getConf());
    job.setJarByClass(BackfillPhaseJob.class);
    job.setJobName(name);/* w  ww . ja v  a 2  s.c o  m*/

    FileSystem fs = FileSystem.get(getConf());
    Configuration configuration = job.getConfiguration();

    LOGGER.info("*******************************************************************************");
    String controllerHost = getAndSetConfiguration(configuration, BACKFILL_PHASE_CONTROLLER_HOST);
    String controllerPort = getAndSetConfiguration(configuration, BACKFILL_PHASE_CONTROLLER_PORT);
    LOGGER.info("Controller Host : {} Controller Port : {}", controllerHost, controllerPort);
    String segmentStartTime = getAndSetConfiguration(configuration, BACKFILL_PHASE_START_TIME);
    String segmentEndTime = getAndSetConfiguration(configuration, BACKFILL_PHASE_END_TIME);
    long startTime = Long.valueOf(segmentStartTime);
    long endTime = Long.valueOf(segmentEndTime);
    if (Long.valueOf(segmentStartTime) > Long.valueOf(segmentEndTime)) {
        throw new IllegalStateException("Start time cannot be greater than end time");
    }
    String tableName = getAndSetConfiguration(configuration, BACKFILL_PHASE_TABLE_NAME);
    LOGGER.info("Start time : {} End time : {} Table name : {}", segmentStartTime, segmentEndTime, tableName);

    String outputPath = getAndSetConfiguration(configuration, BACKFILL_PHASE_OUTPUT_PATH);
    LOGGER.info("Output path : {}", outputPath);
    Path backfillDir = new Path(outputPath);
    if (fs.exists(backfillDir)) {
        LOGGER.warn("Found the output folder deleting it");
        fs.delete(backfillDir, true);
    }
    Path downloadDir = new Path(backfillDir, DOWNLOAD);
    LOGGER.info("Creating download dir : {}", downloadDir);
    fs.mkdirs(downloadDir);
    Path inputDir = new Path(backfillDir, INPUT);
    LOGGER.info("Creating input dir : {}", inputDir);
    fs.mkdirs(inputDir);
    Path outputDir = new Path(backfillDir, OUTPUT);
    LOGGER.info("Creating output dir : {}", outputDir);

    BackfillControllerAPIs backfillControllerAPIs = new BackfillControllerAPIs(controllerHost,
            Integer.valueOf(controllerPort), tableName);

    LOGGER.info("Downloading segments in range {} to {}", startTime, endTime);
    List<String> allSegments = backfillControllerAPIs.getAllSegments(tableName);
    List<String> segmentsToDownload = backfillControllerAPIs.findSegmentsInRange(tableName, allSegments,
            startTime, endTime);
    for (String segmentName : segmentsToDownload) {
        backfillControllerAPIs.downloadSegment(segmentName, downloadDir);
    }

    LOGGER.info("Reading downloaded segment input files");
    List<FileStatus> inputDataFiles = new ArrayList<>();
    inputDataFiles.addAll(Lists.newArrayList(fs.listStatus(downloadDir)));
    LOGGER.info("size {}", inputDataFiles.size());

    try {
        LOGGER.info("Creating input files at {} for segment input files", inputDir);
        for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
            FileStatus file = inputDataFiles.get(seqId);
            String completeFilePath = " " + file.getPath().toString() + " " + seqId;
            Path newOutPutFile = new Path((inputDir + "/"
                    + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_')
                    + ".txt"));
            FSDataOutputStream stream = fs.create(newOutPutFile);
            LOGGER.info("wrote {}", completeFilePath);
            stream.writeUTF(completeFilePath);
            stream.flush();
            stream.close();
        }
    } catch (Exception e) {
        LOGGER.error("Exception while reading input files ", e);
    }

    job.setMapperClass(BackfillPhaseMapJob.BackfillMapper.class);

    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        job.getConfiguration().set("mapreduce.job.credentials.binary",
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
    job.setMaxReduceAttempts(1);
    job.setMaxMapAttempts(0);
    job.setNumReduceTasks(0);

    for (Object key : props.keySet()) {
        job.getConfiguration().set(key.toString(), props.getProperty(key.toString()));
    }

    job.waitForCompletion(true);
    if (!job.isSuccessful()) {
        throw new RuntimeException("Job failed : " + job);
    }

    LOGGER.info("Cleanup the working directory");
    LOGGER.info("Deleting the dir: {}", downloadDir);
    fs.delete(downloadDir, true);
    LOGGER.info("Deleting the dir: {}", inputDir);
    fs.delete(inputDir, true);
    LOGGER.info("Deleting the dir: {}", outputDir);
    fs.delete(outputDir, true);

    return job;
}

From source file:com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseJob.java

License:Apache License

public Job run() throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName(name);/*  w  ww .ja v a 2 s.  com*/
    job.setJarByClass(DerivedColumnTransformationPhaseJob.class);

    Configuration configuration = job.getConfiguration();
    FileSystem fs = FileSystem.get(configuration);

    // Input Path
    String inputPathDir = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH);
    LOGGER.info("Input path dir: " + inputPathDir);
    for (String inputPath : inputPathDir.split(",")) {
        LOGGER.info("Adding input:" + inputPath);
        Path input = new Path(inputPath);
        FileInputFormat.addInputPath(job, input);
    }

    // Topk path
    String topkPath = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH);
    LOGGER.info("Topk path : " + topkPath);

    // Output path
    Path outputPath = new Path(
            getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH));
    LOGGER.info("Output path dir: " + outputPath.toString());
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
    }
    FileOutputFormat.setOutputPath(job, outputPath);

    // Schema
    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
    LOGGER.info("Schema : {}", avroSchema.toString(true));

    // ThirdEyeConfig
    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
    job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString(),
            OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
    LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode());

    // New schema
    Schema outputSchema = newSchema(thirdeyeConfig);
    job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString(),
            outputSchema.toString());

    // Map config
    job.setMapperClass(DerivedColumnTransformationPhaseMapper.class);
    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapOutputKeyClass(AvroKey.class);
    job.setMapOutputValueClass(NullWritable.class);
    AvroJob.setOutputKeySchema(job, outputSchema);
    LazyOutputFormat.setOutputFormatClass(job, AvroKeyOutputFormat.class);
    AvroMultipleOutputs.addNamedOutput(job, "avro", AvroKeyOutputFormat.class, outputSchema);

    job.setNumReduceTasks(0);

    job.waitForCompletion(true);

    return job;
}

From source file:com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseJob.java

License:Apache License

public Job run() throws Exception {

    Job job = Job.getInstance(getConf());

    job.setJarByClass(SegmentCreationPhaseJob.class);
    job.setJobName(name);// w  ww . j a v a2 s . c  o m

    FileSystem fs = FileSystem.get(getConf());

    Configuration configuration = job.getConfiguration();

    String inputSegmentDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_INPUT_PATH);
    LOGGER.info("Input path : {}", inputSegmentDir);
    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputSegmentDir);
    LOGGER.info("Schema : {}", avroSchema);
    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
    LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode());
    String outputDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_OUTPUT_PATH);
    LOGGER.info("Output path : {}", outputDir);
    Path stagingDir = new Path(outputDir, TEMP);
    LOGGER.info("Staging dir : {}", stagingDir);
    String segmentWallClockStart = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_START_TIME);
    LOGGER.info("Segment wallclock start time : {}", segmentWallClockStart);
    String segmentWallClockEnd = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_END_TIME);
    LOGGER.info("Segment wallclock end time : {}", segmentWallClockEnd);
    String schedule = getAndSetConfiguration(configuration, SEGMENT_CREATION_SCHEDULE);
    LOGGER.info("Segment schedule : {}", schedule);
    String isBackfill = props.getProperty(SEGMENT_CREATION_BACKFILL.toString(), DEFAULT_BACKFILL);
    configuration.set(SEGMENT_CREATION_BACKFILL.toString(), isBackfill);
    LOGGER.info("Is Backfill : {}", configuration.get(SEGMENT_CREATION_BACKFILL.toString()));

    // Create temporary directory
    if (fs.exists(stagingDir)) {
        LOGGER.warn("Found the temp folder, deleting it");
        fs.delete(stagingDir, true);
    }
    fs.mkdirs(stagingDir);
    fs.mkdirs(new Path(stagingDir + "/input/"));

    // Create output directory
    if (fs.exists(new Path(outputDir))) {
        LOGGER.warn("Found the output folder deleting it");
        fs.delete(new Path(outputDir), true);
    }
    fs.mkdirs(new Path(outputDir));

    // Read input files
    List<FileStatus> inputDataFiles = new ArrayList<>();
    for (String input : inputSegmentDir.split(",")) {
        Path inputPathPattern = new Path(input);
        inputDataFiles.addAll(Arrays.asList(fs.listStatus(inputPathPattern)));
    }
    LOGGER.info("size {}", inputDataFiles.size());

    try {
        for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
            FileStatus file = inputDataFiles.get(seqId);
            String completeFilePath = " " + file.getPath().toString() + " " + seqId;
            Path newOutPutFile = new Path((stagingDir + "/input/"
                    + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_')
                    + ".txt"));
            FSDataOutputStream stream = fs.create(newOutPutFile);
            LOGGER.info("wrote {}", completeFilePath);
            stream.writeUTF(completeFilePath);
            stream.flush();
            stream.close();
        }
    } catch (Exception e) {
        LOGGER.error("Exception while reading input files ", e);
    }

    job.setMapperClass(SegmentCreationPhaseMapReduceJob.SegmentCreationMapper.class);

    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        job.getConfiguration().set("mapreduce.job.credentials.binary",
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(stagingDir + "/input/"));
    FileOutputFormat.setOutputPath(job, new Path(stagingDir + "/output/"));

    job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
    job.getConfiguration().set(SEGMENT_CREATION_THIRDEYE_CONFIG.toString(),
            OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));

    job.setMaxReduceAttempts(1);
    job.setMaxMapAttempts(0);
    job.setNumReduceTasks(0);
    for (Object key : props.keySet()) {
        job.getConfiguration().set(key.toString(), props.getProperty(key.toString()));
    }

    job.waitForCompletion(true);
    if (!job.isSuccessful()) {
        throw new RuntimeException("Job failed : " + job);
    }

    LOGGER.info("Moving Segment Tar files from {} to: {}", stagingDir + "/output/segmentTar", outputDir);
    FileStatus[] segmentArr = fs.listStatus(new Path(stagingDir + "/output/segmentTar"));
    for (FileStatus segment : segmentArr) {
        fs.rename(segment.getPath(), new Path(outputDir, segment.getPath().getName()));
    }

    // Delete temporary directory.
    LOGGER.info("Cleanup the working directory.");
    LOGGER.info("Deleting the dir: {}", stagingDir);
    fs.delete(stagingDir, true);

    return job;
}

From source file:com.linkedin.thirdeye.hadoop.topk.TopKPhaseJob.java

License:Apache License

public Job run() throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName(name);/*from w  w  w  .  j  av  a2  s  .  c  o m*/
    job.setJarByClass(TopKPhaseJob.class);

    Configuration configuration = job.getConfiguration();
    FileSystem fs = FileSystem.get(configuration);

    // Properties
    LOGGER.info("Properties {}", props);

    // Input Path
    String inputPathDir = getAndSetConfiguration(configuration, TOPK_PHASE_INPUT_PATH);
    LOGGER.info("Input path dir: " + inputPathDir);
    for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
        LOGGER.info("Adding input:" + inputPath);
        Path input = new Path(inputPath);
        FileInputFormat.addInputPath(job, input);
    }

    // Output path
    Path outputPath = new Path(getAndSetConfiguration(configuration, TOPK_PHASE_OUTPUT_PATH));
    LOGGER.info("Output path dir: " + outputPath.toString());
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
    }
    FileOutputFormat.setOutputPath(job, outputPath);

    // Schema
    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
    LOGGER.info("Schema : {}", avroSchema.toString(true));

    // ThirdEyeConfig
    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
    LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
    job.getConfiguration().set(TOPK_PHASE_THIRDEYE_CONFIG.toString(),
            OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));

    // Map config
    job.setMapperClass(TopKPhaseMapper.class);
    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    // Combiner
    job.setCombinerClass(TopKPhaseCombiner.class);

    // Reduce config
    job.setReducerClass(TopKPhaseReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(1);

    job.waitForCompletion(true);

    return job;
}

From source file:com.linkedin.whiteelephant.mapreduce.lib.job.StagedOutputJob.java

License:Apache License

@Override
public boolean waitForCompletion(boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);
    final Path stagedPath = new Path(String.format("%s/%s/staged", stagingPrefix, System.currentTimeMillis()));

    FileOutputFormat.setOutputPath(this, stagedPath);

    final Thread hook = new Thread(new Runnable() {
        @Override//from   w  ww.  j  a  va  2  s.c  o  m
        public void run() {
            try {
                killJob();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    });

    Runtime.getRuntime().addShutdownHook(hook);

    final boolean retVal = super.waitForCompletion(verbose);
    Runtime.getRuntime().removeShutdownHook(hook);

    if (retVal) {
        FileSystem fs = actualOutputPath.getFileSystem(getConfiguration());

        fs.mkdirs(actualOutputPath);

        log.info(String.format("Deleting data at old path[%s]", actualOutputPath));
        fs.delete(actualOutputPath, true);

        log.info(String.format("Moving from staged path[%s] to final resting place[%s]", stagedPath,
                actualOutputPath));
        return fs.rename(stagedPath, actualOutputPath);
    }

    log.warn("retVal was false for some reason...");
    return retVal;
}

From source file:com.littlehotspot.hadoop.mr.box.BoxLog.java

License:Open Source License

@Override
public int run(String[] arg) throws Exception {
    try {//www .  j av  a2 s . c  o m
        // ???
        if (arg.length > 2) {
            BOX_LOG_FORMAT_REGEX = Pattern.compile(arg[2]);
        }

        Job job = Job.getInstance(this.getConf(), BoxLog.class.getSimpleName());
        job.setJarByClass(BoxLog.class);

        /***/
        Path inputPath = new Path(arg[0]);
        FileInputFormat.addInputPath(job, inputPath);
        job.setMapperClass(BoxMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        /***/
        Path outputPath = new Path(arg[1]);
        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }
        FileOutputFormat.setOutputPath(job, outputPath);
        job.setReducerClass(BoxReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.littlehotspot.hadoop.mr.mobile.MobileLog.java

License:Open Source License

@Override
public int run(String[] arg) throws Exception {
    try {//w w w. j  av a  2 s  . c om
        Job job = Job.getInstance(this.getConf(), MobileLog.class.getSimpleName());
        job.setJarByClass(MobileLog.class);

        /***/
        Path inputPath = new Path(arg[0]);
        FileInputFormat.addInputPath(job, inputPath);
        job.setMapperClass(MobileMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        /***/
        Path outputPath = new Path(arg[1]);
        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }
        FileOutputFormat.setOutputPath(job, outputPath);
        job.setReducerClass(MobileReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.littlehotspot.hadoop.mr.nginx.module.cdf.CDFScheduler.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    try {/*from w w  w .  j a  v a2s.c  om*/
        CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE

        String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex);
        String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath);
        String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath);

        // ???
        if (StringUtils.isNotBlank(matcherRegex)) {
            CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex);
        }

        Path inputPath = new Path(hdfsInputPath);
        Path outputPath = new Path(hdfsOutputPath);

        Job job = Job.getInstance(this.getConf(), this.getClass().getName());
        job.setJarByClass(this.getClass());

        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        job.setMapperClass(CDFMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(GeneralReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.littlehotspot.hadoop.mr.nginx.module.hdfs2hbase.api.user.UserScheduler.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    try {/*w w  w  .j av a  2 s. c om*/
        CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE
        CommonVariables.hBaseHelper = new HBaseHelper(this.getConf());

        // ??
        String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex);
        String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath);
        String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath);

        // ???
        if (StringUtils.isNotBlank(matcherRegex)) {
            CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex);
        }

        Path inputPath = new Path(hdfsInputPath);
        Path outputPath = new Path(hdfsOutputPath);

        Job job = Job.getInstance(this.getConf(), this.getClass().getName());
        job.setJarByClass(this.getClass());

        job.setMapperClass(UserMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(UserReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }

        // 
        boolean state = job.waitForCompletion(true);
        if (!state) {
            throw new Exception("MapReduce task execute failed.........");
        }

        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.m6d.filecrush.crush.integration.CrushMapReduceTest.java

License:Apache License

@Before
@Override//w ww  . ja v  a2s .  com
public void setUp() throws Exception {
    super.setUp();

    job = createJobConf();

    job.setBoolean("mapreduce.output.fileoutputformat.compress", true);
    job.set("mapreduce.output.fileoutputformat.compress.type", CompressionType.BLOCK.name());
    job.set("mapreduce.output.fileoutputformat.compress.codec", CustomCompressionCodec.class.getName());

    FileSystem fs = getFileSystem();

    Path homeDirPath = fs.makeQualified(new Path("."));

    homeDir = homeDirPath.toUri().getPath();

    fs.delete(homeDirPath, true);

    defaultCodec = new DefaultCodec();
    defaultCodec.setConf(job);

    customCodec = new CustomCompressionCodec();
    customCodec.setConf(job);
}