List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.inmobi.messaging.consumer.util.TestUtil.java
License:Apache License
public static void cleanupCluster(ClusterUtil cluster) throws IOException { FileSystem fs = FileSystem.get(cluster.getHadoopConf()); LOG.debug("Cleaning up the dir: " + cluster.getRootDir()); fs.delete(new Path(cluster.getRootDir()), true); }
From source file:com.intel.hadoop.hbase.dot.KEY.java
License:Apache License
private void doMapReduce(Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass, String mrTableName) throws IOException, ClassNotFoundException, InterruptedException { this.conf.set(KEY.INPUT_TABLE, mrTableName); Job job = new Job(this.conf); job.setJobName("Generate Data for [" + mrTableName + "]"); job.setJarByClass(GenerateTestTable.class); job.setInputFormatClass(inputFormatClass); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); FileSystem fs = FileSystem.get(conf); Path path = new Path("/tmp", "tempout"); fs.delete(path, true); FileOutputFormat.setOutputPath(job, path); job.setMapperClass(mapperClass);//w ww . java 2 s. co m job.setNumReduceTasks(0); TableMapReduceUtil.addDependencyJars(job); // Add a Class from the hbase.jar so it gets registered too. TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.util.Bytes.class); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
From source file:com.jointhegrid.hive_test.HiveTestBase.java
License:Apache License
public void setUp() throws Exception { super.setUp(); String jarFile = org.apache.hadoop.hive.ql.exec.MapRedTask.class.getProtectionDomain().getCodeSource() .getLocation().getFile();//from ww w .ja v a 2s . c o m System.setProperty(HiveConf.ConfVars.HIVEJAR.toString(), jarFile); Path rootDir = getDir(ROOT_DIR); Configuration conf = createJobConf(); FileSystem fs = FileSystem.get(conf); fs.delete(rootDir, true); Path metastorePath = new Path("/tmp/metastore_db"); fs.delete(metastorePath, true); Path warehouse = new Path("/tmp/warehouse"); fs.delete(warehouse, true); fs.mkdirs(warehouse); }
From source file:com.jyz.study.hadoop.mapreduce.datajoin.DataJoinJob.java
License:Apache License
public static JobConf createDataJoinJob(String args[]) throws IOException { String inputDir = args[0];// w ww. j a v a2s . c o m String outputDir = args[1]; Class inputFormat = SequenceFileInputFormat.class; if (args[2].compareToIgnoreCase("text") != 0) { System.out.println("Using SequenceFileInputFormat: " + args[2]); } else { System.out.println("Using TextInputFormat: " + args[2]); inputFormat = TextInputFormat.class; } int numOfReducers = Integer.parseInt(args[3]); Class mapper = getClassByName(args[4]); Class reducer = getClassByName(args[5]); Class mapoutputValueClass = getClassByName(args[6]); Class outputFormat = TextOutputFormat.class; Class outputValueClass = Text.class; if (args[7].compareToIgnoreCase("text") != 0) { System.out.println("Using SequenceFileOutputFormat: " + args[7]); outputFormat = SequenceFileOutputFormat.class; outputValueClass = getClassByName(args[7]); } else { System.out.println("Using TextOutputFormat: " + args[7]); } long maxNumOfValuesPerGroup = 100; String jobName = ""; if (args.length > 8) { maxNumOfValuesPerGroup = Long.parseLong(args[8]); } if (args.length > 9) { jobName = args[9]; } Configuration defaults = new Configuration(); JobConf job = new JobConf(defaults, DataJoinJob.class); job.setJobName("DataJoinJob: " + jobName); FileSystem fs = FileSystem.get(defaults); fs.delete(new Path(outputDir), true); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormat(inputFormat); job.setMapperClass(mapper); FileOutputFormat.setOutputPath(job, new Path(outputDir)); job.setOutputFormat(outputFormat); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(mapoutputValueClass); job.setOutputKeyClass(Text.class); job.setOutputValueClass(outputValueClass); job.setReducerClass(reducer); job.setNumMapTasks(1); job.setNumReduceTasks(numOfReducers); job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup); return job; }
From source file:com.kasabi.labs.freebase.mr.Freebase2RDFDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (log.isDebugEnabled()) { log.debug("run({})", Utils.toString(args)); }/*from ww w . j a va2 s.c o m*/ if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; } Configuration configuration = getConf(); boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION, Constants.OPTION_USE_COMPRESSION_DEFAULT); if (useCompression) { configuration.setBoolean("mapred.compress.map.output", true); configuration.set("mapred.output.compression.type", "BLOCK"); configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); } boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT, Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[1]), true); } Job job = new Job(configuration); job.setJobName("Freebase2RDFDriver"); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(Freebase2RDFMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(Freebase2RDFReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); Utils.setReducers(job, configuration, log); job.setOutputFormatClass(TextOutputFormat.class); if (log.isDebugEnabled()) Utils.log(job, log); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.kse.bigdata.main.Driver.java
License:Apache License
public static void main(String[] args) throws Exception { /********************************************************************************** ** Merge the source files into one. ** /** Should change the directories of each file before executing the program ** ***********************************************************************************/ // String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug"; // String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv"; // File resultFile = new File(resultFileDirectory); // if(!resultFile.exists()) // new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles(); /********************************************************************************** * Hadoop Operation.//from w w w . j a v a 2 s .c o m * Befort Start, Check the Length of Sequence We Want to Predict. **********************************************************************************/ Configuration conf = new Configuration(); //Enable MapReduce intermediate compression as Snappy conf.setBoolean("mapred.compress.map.output", true); conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //Enable Profiling //conf.setBoolean("mapred.task.profile", true); String testPath = null; String inputPath = null; String outputPath = null; int sampleSize = 1; ArrayList<String> results = new ArrayList<String>(); for (int index = 0; index < args.length; index++) { /* * Mandatory command */ //Extract input path string from command line. if (args[index].equals("-in")) inputPath = args[index + 1]; //Extract output path string from command line. if (args[index].equals("-out")) outputPath = args[index + 1]; //Extract test data path string from command line. if (args[index].equals("-test")) testPath = args[index + 1]; /* * Optional command */ //Extract a number of neighbors. if (args[index].equals("-nn")) conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1])); //Whether job uses normalization or not. if (args[index].equals("-norm")) conf.setBoolean(Map.NORMALIZATION, true); //Extract the number of sample size to test. if (args[index].equals("-s")) sampleSize = Integer.valueOf(args[index + 1]); //Whether job uses mean or median //[Default : mean] if (args[index].equals("-med")) conf.setBoolean(Reduce.MEDIAN, true); } String outputFileName = "part-r-00000"; SequenceSampler sampler = new SequenceSampler(testPath, sampleSize); LinkedList<Sequence> testSequences = sampler.getRandomSample(); // Test Sequence // String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100"; // Sequence testSeq = new Sequence(testSeqString); // LinkedList<Sequence> testSequences = new LinkedList<>(); // testSequences.add(testSeq); for (Sequence seq : testSequences) { /* ******************** Hadoop Launch *********************** */ System.out.println(seq.getTailString()); conf.set(Map.INPUT_SEQUENCE, seq.toString()); Job job = new Job(conf); job.setJarByClass(Driver.class); job.setJobName("term-project-driver"); job.setMapperClass(Map.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); // Should think another way to implement the combiner class // Current Implementation is not helpful to Job. // job.setCombinerClass(Combiner.class); //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set. job.setNumReduceTasks(1); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); /* * if job finishes, get result of the job and store it in results(list). */ try { FileSystem hdfs = FileSystem.get(new Configuration()); BufferedReader fileReader = new BufferedReader( new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName)))); String line; while ((line = fileReader.readLine()) != null) { results.add(seq.getSeqString() + " " + line); } fileReader.close(); hdfs.delete(new Path(outputPath), true); hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } /* * if all jobs finish, store results of jobs to output/result.txt file. */ String finalOutputPath = "output/result.csv"; try { FileSystem hdfs = FileSystem.get(new Configuration()); Path file = new Path(finalOutputPath); if (hdfs.exists(file)) { hdfs.delete(file, true); } OutputStream os = hdfs.create(file); PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8")); //CSV File Header printWriter.println("Actual,Predicted,MER,MAE"); printWriter.flush(); for (String result : results) { String[] tokens = result.split("\\s+"); printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]); printWriter.flush(); } printWriter.close(); hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } }
From source file:com.kylinolap.common.persistence.HBaseResourceStore.java
License:Apache License
private Path writeLargeCellToHdfs(String resPath, byte[] largeColumn, HTableInterface table) throws IOException { Path redirectPath = bigCellHDFSPath(resPath); Configuration hconf = HadoopUtil.getCurrentConfiguration(); FileSystem fileSystem = FileSystem.get(hconf); if (fileSystem.exists(redirectPath)) { fileSystem.delete(redirectPath, true); }/*from ww w. j a va 2 s . c om*/ FSDataOutputStream out = fileSystem.create(redirectPath); try { out.write(largeColumn); } finally { IOUtils.closeQuietly(out); } return redirectPath; }
From source file:com.kylinolap.job.hadoop.AbstractHadoopJob.java
License:Apache License
protected void deletePath(Configuration conf, Path path) throws IOException { FileSystem fs = FileSystem.get(conf); if (fs.exists(path)) { fs.delete(path, true); }// w w w . j a v a 2s. c o m }
From source file:com.kylinolap.job.hadoop.cube.StorageCleanupJob.java
License:Apache License
private void cleanUnusedHdfsFiles(Configuration conf) throws IOException { JobEngineConfig engineConfig = new JobEngineConfig(KylinConfig.getInstanceFromEnv()); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); FileSystem fs = FileSystem.get(conf); List<String> allHdfsPathsNeedToBeDeleted = new ArrayList<String>(); // GlobFilter filter = new // GlobFilter(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory() // + "/kylin-.*"); FileStatus[] fStatus = fs.listStatus(new Path(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory())); for (FileStatus status : fStatus) { String path = status.getPath().getName(); // System.out.println(path); if (path.startsWith(JobInstance.JOB_WORKING_DIR_PREFIX)) { String kylinJobPath = engineConfig.getHdfsWorkingDirectory() + "/" + path; allHdfsPathsNeedToBeDeleted.add(kylinJobPath); }/* w w w.ja va2 s . c o m*/ } List<JobInstance> allJobs = JobDAO.getInstance(KylinConfig.getInstanceFromEnv()).listAllJobs(); for (JobInstance jobInstance : allJobs) { // only remove FINISHED and DISCARDED job intermediate files if (isJobInUse(jobInstance) == true) { String path = JobInstance.getJobWorkingDir(jobInstance, engineConfig); allHdfsPathsNeedToBeDeleted.remove(path); log.info("Remove " + path + " from deletion list, as the path belongs to job " + jobInstance.getUuid() + " with status " + jobInstance.getStatus()); } } // remove every segment working dir from deletion list for (CubeInstance cube : cubeMgr.listAllCubes()) { for (CubeSegment seg : cube.getSegments()) { String jobUuid = seg.getLastBuildJobID(); if (jobUuid != null && jobUuid.equals("") == false) { String path = JobInstance.getJobWorkingDir(jobUuid, engineConfig.getHdfsWorkingDirectory()); allHdfsPathsNeedToBeDeleted.remove(path); log.info("Remove " + path + " from deletion list, as the path belongs to segment " + seg + " of cube " + cube.getName()); } } } if (delete == true) { // remove files for (String hdfsPath : allHdfsPathsNeedToBeDeleted) { log.info("Deleting hdfs path " + hdfsPath); Path p = new Path(hdfsPath); if (fs.exists(p) == true) { fs.delete(p, true); log.info("Deleted hdfs path " + hdfsPath); } else { log.info("Hdfs path " + hdfsPath + "does not exist"); } } } else { System.out.println("--------------- HDFS Path To Be Deleted ---------------"); for (String hdfsPath : allHdfsPathsNeedToBeDeleted) { System.out.println(hdfsPath); } System.out.println("-------------------------------------------------------"); } }
From source file:com.liferay.hadoop.action.HadoopJob.java
License:Open Source License
public String doExecute(HttpServletRequest request, HttpServletResponse response) throws Exception { response.setContentType(ContentTypes.TEXT_PLAIN_UTF8); PrintWriter writer = response.getWriter(); FileSystem fileSystem = HadoopManager.getFileSystem(); JobClient jobClient = HadoopManager.getJobClient(); writer.println("-- Job Status --"); Path inputPath = new Path("/index/*/*"); Path outputPath = new Path("/wordcount/results"); try {/* w w w . j av a2s. c om*/ if (_runningJob == null) { writer.println("Creating job"); if (fileSystem.exists(_jobPath)) { fileSystem.delete(_jobPath, false); } if (!fileSystem.exists(_jobPath)) { writer.println("Deploying the job code to cluster"); FSDataOutputStream outputStream = null; try { outputStream = fileSystem.create(_jobPath); ServletContext servletContext = HadoopManager.getServletContext(); InputStream inputStream = servletContext.getResourceAsStream("/WEB-INF/lib/hadoop-job.jar"); StreamUtil.transfer(inputStream, outputStream, false); } finally { StreamUtil.cleanUp(outputStream); } writer.println("Job code deployed to cluster"); } if (fileSystem.exists(outputPath)) { writer.println("A previous job output was found, backing it up"); fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } _jobConf = HadoopManager.createNewJobConf(); _jobConf.setJobName("Word Count"); writer.println("Job '" + _jobConf.getJobName() + "' is being configured"); _jobConf.setJarByClass(Map.class); _jobConf.setOutputKeyClass(Text.class); _jobConf.setOutputValueClass(IntWritable.class); _jobConf.setMapperClass(Map.class); _jobConf.setCombinerClass(Reduce.class); _jobConf.setReducerClass(Reduce.class); _jobConf.setInputFormat(TextInputFormat.class); _jobConf.setOutputFormat(TextOutputFormat.class); writer.println("Job code deployed to distributed cache's classpath"); DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem); FileInputFormat.setInputPaths(_jobConf, inputPath); FileOutputFormat.setOutputPath(_jobConf, outputPath); writer.println("Submitting job the first time"); _runningJob = jobClient.submitJob(_jobConf); writer.println("Job submitted"); } int jobState = _runningJob.getJobState(); writer.println( "Job status: " + jobState + " (RUNNING = 1, SUCCEEDED = 2, FAILED = 3, PREP = 4, KILLED = 5)"); if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) { writer.println("Re-issuing the job"); if (fileSystem.exists(outputPath)) { writer.println("A previous job output was found, backing it up"); fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } writer.println("Submitting job the first time"); _runningJob = jobClient.submitJob(_jobConf); writer.println("Job submitted"); } } catch (Exception ioe) { writer.println("Job error: "); ioe.printStackTrace(writer); } writer.flush(); writer.close(); return null; }