List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.linkedin.thirdeye.hadoop.backfill.BackfillPhaseJob.java
License:Apache License
public Job run() throws Exception { Job job = Job.getInstance(getConf()); job.setJarByClass(BackfillPhaseJob.class); job.setJobName(name);/* w ww . ja v a 2 s.c o m*/ FileSystem fs = FileSystem.get(getConf()); Configuration configuration = job.getConfiguration(); LOGGER.info("*******************************************************************************"); String controllerHost = getAndSetConfiguration(configuration, BACKFILL_PHASE_CONTROLLER_HOST); String controllerPort = getAndSetConfiguration(configuration, BACKFILL_PHASE_CONTROLLER_PORT); LOGGER.info("Controller Host : {} Controller Port : {}", controllerHost, controllerPort); String segmentStartTime = getAndSetConfiguration(configuration, BACKFILL_PHASE_START_TIME); String segmentEndTime = getAndSetConfiguration(configuration, BACKFILL_PHASE_END_TIME); long startTime = Long.valueOf(segmentStartTime); long endTime = Long.valueOf(segmentEndTime); if (Long.valueOf(segmentStartTime) > Long.valueOf(segmentEndTime)) { throw new IllegalStateException("Start time cannot be greater than end time"); } String tableName = getAndSetConfiguration(configuration, BACKFILL_PHASE_TABLE_NAME); LOGGER.info("Start time : {} End time : {} Table name : {}", segmentStartTime, segmentEndTime, tableName); String outputPath = getAndSetConfiguration(configuration, BACKFILL_PHASE_OUTPUT_PATH); LOGGER.info("Output path : {}", outputPath); Path backfillDir = new Path(outputPath); if (fs.exists(backfillDir)) { LOGGER.warn("Found the output folder deleting it"); fs.delete(backfillDir, true); } Path downloadDir = new Path(backfillDir, DOWNLOAD); LOGGER.info("Creating download dir : {}", downloadDir); fs.mkdirs(downloadDir); Path inputDir = new Path(backfillDir, INPUT); LOGGER.info("Creating input dir : {}", inputDir); fs.mkdirs(inputDir); Path outputDir = new Path(backfillDir, OUTPUT); LOGGER.info("Creating output dir : {}", outputDir); BackfillControllerAPIs backfillControllerAPIs = new BackfillControllerAPIs(controllerHost, Integer.valueOf(controllerPort), tableName); LOGGER.info("Downloading segments in range {} to {}", startTime, endTime); List<String> allSegments = backfillControllerAPIs.getAllSegments(tableName); List<String> segmentsToDownload = backfillControllerAPIs.findSegmentsInRange(tableName, allSegments, startTime, endTime); for (String segmentName : segmentsToDownload) { backfillControllerAPIs.downloadSegment(segmentName, downloadDir); } LOGGER.info("Reading downloaded segment input files"); List<FileStatus> inputDataFiles = new ArrayList<>(); inputDataFiles.addAll(Lists.newArrayList(fs.listStatus(downloadDir))); LOGGER.info("size {}", inputDataFiles.size()); try { LOGGER.info("Creating input files at {} for segment input files", inputDir); for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) { FileStatus file = inputDataFiles.get(seqId); String completeFilePath = " " + file.getPath().toString() + " " + seqId; Path newOutPutFile = new Path((inputDir + "/" + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_') + ".txt")); FSDataOutputStream stream = fs.create(newOutPutFile); LOGGER.info("wrote {}", completeFilePath); stream.writeUTF(completeFilePath); stream.flush(); stream.close(); } } catch (Exception e) { LOGGER.error("Exception while reading input files ", e); } job.setMapperClass(BackfillPhaseMapJob.BackfillMapper.class); if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { job.getConfiguration().set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); FileInputFormat.addInputPath(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size()); job.setMaxReduceAttempts(1); job.setMaxMapAttempts(0); job.setNumReduceTasks(0); for (Object key : props.keySet()) { job.getConfiguration().set(key.toString(), props.getProperty(key.toString())); } job.waitForCompletion(true); if (!job.isSuccessful()) { throw new RuntimeException("Job failed : " + job); } LOGGER.info("Cleanup the working directory"); LOGGER.info("Deleting the dir: {}", downloadDir); fs.delete(downloadDir, true); LOGGER.info("Deleting the dir: {}", inputDir); fs.delete(inputDir, true); LOGGER.info("Deleting the dir: {}", outputDir); fs.delete(outputDir, true); return job; }
From source file:com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseJob.java
License:Apache License
public Job run() throws Exception { Job job = Job.getInstance(getConf()); job.setJobName(name);/* w ww .ja v a 2 s. com*/ job.setJarByClass(DerivedColumnTransformationPhaseJob.class); Configuration configuration = job.getConfiguration(); FileSystem fs = FileSystem.get(configuration); // Input Path String inputPathDir = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH); LOGGER.info("Input path dir: " + inputPathDir); for (String inputPath : inputPathDir.split(",")) { LOGGER.info("Adding input:" + inputPath); Path input = new Path(inputPath); FileInputFormat.addInputPath(job, input); } // Topk path String topkPath = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH); LOGGER.info("Topk path : " + topkPath); // Output path Path outputPath = new Path( getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH)); LOGGER.info("Output path dir: " + outputPath.toString()); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } FileOutputFormat.setOutputPath(job, outputPath); // Schema Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir); LOGGER.info("Schema : {}", avroSchema.toString(true)); // ThirdEyeConfig String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty( props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()), props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema); props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty); ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props); job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig)); LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode()); // New schema Schema outputSchema = newSchema(thirdeyeConfig); job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString(), outputSchema.toString()); // Map config job.setMapperClass(DerivedColumnTransformationPhaseMapper.class); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapOutputKeyClass(AvroKey.class); job.setMapOutputValueClass(NullWritable.class); AvroJob.setOutputKeySchema(job, outputSchema); LazyOutputFormat.setOutputFormatClass(job, AvroKeyOutputFormat.class); AvroMultipleOutputs.addNamedOutput(job, "avro", AvroKeyOutputFormat.class, outputSchema); job.setNumReduceTasks(0); job.waitForCompletion(true); return job; }
From source file:com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseJob.java
License:Apache License
public Job run() throws Exception { Job job = Job.getInstance(getConf()); job.setJarByClass(SegmentCreationPhaseJob.class); job.setJobName(name);// w ww . j a v a2 s . c o m FileSystem fs = FileSystem.get(getConf()); Configuration configuration = job.getConfiguration(); String inputSegmentDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_INPUT_PATH); LOGGER.info("Input path : {}", inputSegmentDir); Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputSegmentDir); LOGGER.info("Schema : {}", avroSchema); String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty( props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()), props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema); props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty); ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props); LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode()); String outputDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_OUTPUT_PATH); LOGGER.info("Output path : {}", outputDir); Path stagingDir = new Path(outputDir, TEMP); LOGGER.info("Staging dir : {}", stagingDir); String segmentWallClockStart = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_START_TIME); LOGGER.info("Segment wallclock start time : {}", segmentWallClockStart); String segmentWallClockEnd = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_END_TIME); LOGGER.info("Segment wallclock end time : {}", segmentWallClockEnd); String schedule = getAndSetConfiguration(configuration, SEGMENT_CREATION_SCHEDULE); LOGGER.info("Segment schedule : {}", schedule); String isBackfill = props.getProperty(SEGMENT_CREATION_BACKFILL.toString(), DEFAULT_BACKFILL); configuration.set(SEGMENT_CREATION_BACKFILL.toString(), isBackfill); LOGGER.info("Is Backfill : {}", configuration.get(SEGMENT_CREATION_BACKFILL.toString())); // Create temporary directory if (fs.exists(stagingDir)) { LOGGER.warn("Found the temp folder, deleting it"); fs.delete(stagingDir, true); } fs.mkdirs(stagingDir); fs.mkdirs(new Path(stagingDir + "/input/")); // Create output directory if (fs.exists(new Path(outputDir))) { LOGGER.warn("Found the output folder deleting it"); fs.delete(new Path(outputDir), true); } fs.mkdirs(new Path(outputDir)); // Read input files List<FileStatus> inputDataFiles = new ArrayList<>(); for (String input : inputSegmentDir.split(",")) { Path inputPathPattern = new Path(input); inputDataFiles.addAll(Arrays.asList(fs.listStatus(inputPathPattern))); } LOGGER.info("size {}", inputDataFiles.size()); try { for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) { FileStatus file = inputDataFiles.get(seqId); String completeFilePath = " " + file.getPath().toString() + " " + seqId; Path newOutPutFile = new Path((stagingDir + "/input/" + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_') + ".txt")); FSDataOutputStream stream = fs.create(newOutPutFile); LOGGER.info("wrote {}", completeFilePath); stream.writeUTF(completeFilePath); stream.flush(); stream.close(); } } catch (Exception e) { LOGGER.error("Exception while reading input files ", e); } job.setMapperClass(SegmentCreationPhaseMapReduceJob.SegmentCreationMapper.class); if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { job.getConfiguration().set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(stagingDir + "/input/")); FileOutputFormat.setOutputPath(job, new Path(stagingDir + "/output/")); job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size()); job.getConfiguration().set(SEGMENT_CREATION_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig)); job.setMaxReduceAttempts(1); job.setMaxMapAttempts(0); job.setNumReduceTasks(0); for (Object key : props.keySet()) { job.getConfiguration().set(key.toString(), props.getProperty(key.toString())); } job.waitForCompletion(true); if (!job.isSuccessful()) { throw new RuntimeException("Job failed : " + job); } LOGGER.info("Moving Segment Tar files from {} to: {}", stagingDir + "/output/segmentTar", outputDir); FileStatus[] segmentArr = fs.listStatus(new Path(stagingDir + "/output/segmentTar")); for (FileStatus segment : segmentArr) { fs.rename(segment.getPath(), new Path(outputDir, segment.getPath().getName())); } // Delete temporary directory. LOGGER.info("Cleanup the working directory."); LOGGER.info("Deleting the dir: {}", stagingDir); fs.delete(stagingDir, true); return job; }
From source file:com.linkedin.thirdeye.hadoop.topk.TopKPhaseJob.java
License:Apache License
public Job run() throws Exception { Job job = Job.getInstance(getConf()); job.setJobName(name);/*from w w w . j av a2 s . c o m*/ job.setJarByClass(TopKPhaseJob.class); Configuration configuration = job.getConfiguration(); FileSystem fs = FileSystem.get(configuration); // Properties LOGGER.info("Properties {}", props); // Input Path String inputPathDir = getAndSetConfiguration(configuration, TOPK_PHASE_INPUT_PATH); LOGGER.info("Input path dir: " + inputPathDir); for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) { LOGGER.info("Adding input:" + inputPath); Path input = new Path(inputPath); FileInputFormat.addInputPath(job, input); } // Output path Path outputPath = new Path(getAndSetConfiguration(configuration, TOPK_PHASE_OUTPUT_PATH)); LOGGER.info("Output path dir: " + outputPath.toString()); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } FileOutputFormat.setOutputPath(job, outputPath); // Schema Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir); LOGGER.info("Schema : {}", avroSchema.toString(true)); // ThirdEyeConfig String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty( props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()), props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema); props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty); ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props); LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode()); job.getConfiguration().set(TOPK_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig)); // Map config job.setMapperClass(TopKPhaseMapper.class); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); // Combiner job.setCombinerClass(TopKPhaseCombiner.class); // Reduce config job.setReducerClass(TopKPhaseReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(1); job.waitForCompletion(true); return job; }
From source file:com.linkedin.whiteelephant.mapreduce.lib.job.StagedOutputJob.java
License:Apache License
@Override public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException { final Path actualOutputPath = FileOutputFormat.getOutputPath(this); final Path stagedPath = new Path(String.format("%s/%s/staged", stagingPrefix, System.currentTimeMillis())); FileOutputFormat.setOutputPath(this, stagedPath); final Thread hook = new Thread(new Runnable() { @Override//from w ww. j a va 2 s.c o m public void run() { try { killJob(); } catch (IOException e) { e.printStackTrace(); } } }); Runtime.getRuntime().addShutdownHook(hook); final boolean retVal = super.waitForCompletion(verbose); Runtime.getRuntime().removeShutdownHook(hook); if (retVal) { FileSystem fs = actualOutputPath.getFileSystem(getConfiguration()); fs.mkdirs(actualOutputPath); log.info(String.format("Deleting data at old path[%s]", actualOutputPath)); fs.delete(actualOutputPath, true); log.info(String.format("Moving from staged path[%s] to final resting place[%s]", stagedPath, actualOutputPath)); return fs.rename(stagedPath, actualOutputPath); } log.warn("retVal was false for some reason..."); return retVal; }
From source file:com.littlehotspot.hadoop.mr.box.BoxLog.java
License:Open Source License
@Override public int run(String[] arg) throws Exception { try {//www . j av a2 s . c o m // ??? if (arg.length > 2) { BOX_LOG_FORMAT_REGEX = Pattern.compile(arg[2]); } Job job = Job.getInstance(this.getConf(), BoxLog.class.getSimpleName()); job.setJarByClass(BoxLog.class); /***/ Path inputPath = new Path(arg[0]); FileInputFormat.addInputPath(job, inputPath); job.setMapperClass(BoxMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); /***/ Path outputPath = new Path(arg[1]); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } FileOutputFormat.setOutputPath(job, outputPath); job.setReducerClass(BoxReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean status = job.waitForCompletion(true); if (!status) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.littlehotspot.hadoop.mr.mobile.MobileLog.java
License:Open Source License
@Override public int run(String[] arg) throws Exception { try {//w w w. j av a 2 s . c om Job job = Job.getInstance(this.getConf(), MobileLog.class.getSimpleName()); job.setJarByClass(MobileLog.class); /***/ Path inputPath = new Path(arg[0]); FileInputFormat.addInputPath(job, inputPath); job.setMapperClass(MobileMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); /***/ Path outputPath = new Path(arg[1]); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } FileOutputFormat.setOutputPath(job, outputPath); job.setReducerClass(MobileReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean status = job.waitForCompletion(true); if (!status) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.littlehotspot.hadoop.mr.nginx.module.cdf.CDFScheduler.java
License:Open Source License
@Override public int run(String[] args) throws Exception { try {/*from w w w . j a v a2s.c om*/ CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex); String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath); String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath); // ??? if (StringUtils.isNotBlank(matcherRegex)) { CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex); } Path inputPath = new Path(hdfsInputPath); Path outputPath = new Path(hdfsOutputPath); Job job = Job.getInstance(this.getConf(), this.getClass().getName()); job.setJarByClass(this.getClass()); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(CDFMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(GeneralReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } boolean status = job.waitForCompletion(true); if (!status) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.littlehotspot.hadoop.mr.nginx.module.hdfs2hbase.api.user.UserScheduler.java
License:Open Source License
@Override public int run(String[] args) throws Exception { try {/*w w w .j av a 2 s. c om*/ CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE CommonVariables.hBaseHelper = new HBaseHelper(this.getConf()); // ?? String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex); String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath); String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath); // ??? if (StringUtils.isNotBlank(matcherRegex)) { CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex); } Path inputPath = new Path(hdfsInputPath); Path outputPath = new Path(hdfsOutputPath); Job job = Job.getInstance(this.getConf(), this.getClass().getName()); job.setJarByClass(this.getClass()); job.setMapperClass(UserMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(UserReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } // boolean state = job.waitForCompletion(true); if (!state) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.m6d.filecrush.crush.integration.CrushMapReduceTest.java
License:Apache License
@Before @Override//w ww . ja v a2s . com public void setUp() throws Exception { super.setUp(); job = createJobConf(); job.setBoolean("mapreduce.output.fileoutputformat.compress", true); job.set("mapreduce.output.fileoutputformat.compress.type", CompressionType.BLOCK.name()); job.set("mapreduce.output.fileoutputformat.compress.codec", CustomCompressionCodec.class.getName()); FileSystem fs = getFileSystem(); Path homeDirPath = fs.makeQualified(new Path(".")); homeDir = homeDirPath.toUri().getPath(); fs.delete(homeDirPath, true); defaultCodec = new DefaultCodec(); defaultCodec.setConf(job); customCodec = new CustomCompressionCodec(); customCodec.setConf(job); }