List of usage examples for org.apache.hadoop.fs FileSystem close
@Override public void close() throws IOException
From source file:net.sf.nutchcontentexporter.NutchContentExporter.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.out.println("two params required: segmentdir (-local | -dfs <namenode:port>) outputdir"); return 1; }// w ww .ja v a 2 s . c o m try { FileSystem fs = FileSystem.get(conf); String segment = otherArgs[0]; File outDir = new File(otherArgs[1]); if (!outDir.exists()) { if (outDir.mkdirs()) { System.out.println("Creating output dir " + outDir.getAbsolutePath()); } } Path file = new Path(segment, Content.DIR_NAME + "/part-00000/data"); // new 2.0 API SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file)); Text key = new Text(); Content content = new Content(); while (reader.next(key, content)) { String filename = key.toString().replaceFirst("http://", "").replaceAll("/", "___").trim(); // limit the output file name to 255 characters if (filename.length() > MAX_FILE_NAME_LENGTH) { filename = filename.substring(0, MAX_FILE_NAME_LENGTH); } File f = new File(outDir.getCanonicalPath() + "/" + filename); FileOutputStream fos = new FileOutputStream(f); fos.write(content.getContent()); fos.close(); System.out.println(f.getAbsolutePath()); } reader.close(); fs.close(); } catch (Exception e) { throw new RuntimeException(e); } return 0; }
From source file:net.sf.nutchcontentexporter.NutchToWARCConverter.java
License:Apache License
/** * Input: Nutch segment folder (e.g. "20150303005802") * Ouput: gz/bz2 WARC file (e.g. "20150303005802.warc.gz/bz2") * Third parameter is an ouput file prefix (e.g. "prefix20150303005802.warc.gz") * <p/>/* w ww .j av a 2 s. com*/ * By default, the output is compressed with gz * * @param args args * @return int * @throws Exception */ @Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); try { FileSystem fs = FileSystem.get(conf); String segmentDir = otherArgs[0]; File outDir = new File(otherArgs[1]); if (!outDir.exists()) { if (outDir.mkdirs()) { System.out.println("Creating output dir " + outDir.getAbsolutePath()); } } String outputFilePrefix = ""; if (otherArgs.length >= 3) { outputFilePrefix = otherArgs[2]; } boolean compressBz2 = false; // do we want bz2 output? if (otherArgs.length >= 4) { compressBz2 = "bz2".equals(otherArgs[3]); } Path file = new Path(segmentDir, Content.DIR_NAME + "/part-00000/data"); String extension = ".warc." + (compressBz2 ? "bz2" : "gz"); String segmentName = new File(segmentDir).getName(); nutchSegmentToWARCFile(file, outDir, outputFilePrefix + segmentName, extension, conf, compressBz2); fs.close(); } catch (Exception e) { throw new RuntimeException(e); } return 0; }
From source file:nl.bioinf.wvanhelvoirt.HadoopPhredCalculator.FastqFileRecordWriter.java
License:Open Source License
/** * Override method that writes the Reducer output to a file. * * @param key LongWritable as key, which will not be used. * @param value TextArrayWritable containing the count data. * @throws IOException Returns default exception. * @throws InterruptedException If connection problem. *//*from w w w. j a v a2s. c om*/ @Override public void write(NullWritable key, TextArrayWritable value) throws IOException, InterruptedException { // Set the filesystem and delete path if it exists. FileSystem hdfs = FileSystem.get(this.mConf); if (hdfs.exists(this.mOutputPath)) { hdfs.delete(this.mOutputPath, false); } // Write the TextArrayWritable contents to a fastqc file. FSDataOutputStream out = hdfs.create(this.mOutputPath); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out)); for (Text line : value.get()) { bw.write(line.toString()); bw.newLine(); } bw.close(); hdfs.close(); }
From source file:nl.tudelft.graphalytics.giraph.GiraphPlatform.java
License:Apache License
@Override public void uploadGraph(Graph graph) throws Exception { LOG.info("Uploading graph \"{}\" to HDFS", graph.getName()); String uploadPath = Paths.get(hdfsDirectory, getName(), "input", graph.getName()).toString(); // Upload the graph to HDFS FileSystem fs = FileSystem.get(new Configuration()); LOG.debug("- Uploading vertex list"); fs.copyFromLocalFile(new Path(graph.getVertexFilePath()), new Path(uploadPath + ".v")); LOG.debug("- Uploading edge list"); fs.copyFromLocalFile(new Path(graph.getEdgeFilePath()), new Path(uploadPath + ".e")); fs.close(); // Track available datasets in a map pathsOfGraphs.put(graph.getName(), uploadPath); }
From source file:nl.tudelft.graphalytics.giraph.GiraphPlatform.java
License:Apache License
@Override public PlatformBenchmarkResult executeAlgorithmOnGraph(Benchmark benchmark) throws PlatformExecutionException { Algorithm algorithm = benchmark.getAlgorithm(); Graph graph = benchmark.getGraph();// w w w . ja v a2s . com Object parameters = benchmark.getAlgorithmParameters(); LOG.info("Executing algorithm \"{}\" on graph \"{}\".", algorithm.getName(), graph.getName()); int result; try { // Prepare the appropriate job for the given algorithm type GiraphJob job; switch (algorithm) { case BFS: job = new BreadthFirstSearchJob(parameters, graph.getGraphFormat()); break; case CDLP: job = new CommunityDetectionLPJob(parameters, graph.getGraphFormat()); break; case WCC: job = new WeaklyConnectedComponentsJob(graph.getGraphFormat()); break; case FFM: job = new ForestFireModelJob(parameters, graph.getGraphFormat()); break; case LCC: job = new LocalClusteringCoefficientJob(graph.getGraphFormat()); break; case PR: job = new PageRankJob(parameters, graph.getGraphFormat()); break; case SSSP: job = new SingleSourceShortestPathJob(parameters, graph); break; default: throw new IllegalArgumentException("Unsupported algorithm: " + algorithm); } // Create the job configuration using the Giraph properties file String hdfsOutputPath = Paths.get(hdfsDirectory, getName(), "output", algorithm + "-" + graph.getName()) .toString(); Configuration jobConf = new Configuration(); GiraphJob.INPUT_PATH.set(jobConf, pathsOfGraphs.get(graph.getName())); GiraphJob.OUTPUT_PATH.set(jobConf, hdfsOutputPath); GiraphJob.ZOOKEEPER_ADDRESS.set(jobConf, ConfigurationUtil.getString(giraphConfig, ZOOKEEPERADDRESS)); transferIfSet(giraphConfig, JOB_WORKERCOUNT, jobConf, GiraphJob.WORKER_COUNT); transferIfSet(giraphConfig, JOB_HEAPSIZE, jobConf, GiraphJob.HEAP_SIZE_MB); transferIfSet(giraphConfig, JOB_MEMORYSIZE, jobConf, GiraphJob.WORKER_MEMORY_MB); transferGiraphOptions(giraphConfig, jobConf); // Execute the Giraph job result = ToolRunner.run(jobConf, job, new String[0]); // TODO: Clean up intermediate and output data, depending on some configuration. if (benchmark.isOutputRequired()) { FileSystem fs = FileSystem.get(new Configuration()); fs.copyToLocalFile(new Path(hdfsOutputPath), new Path(benchmark.getOutputPath())); fs.close(); } } catch (Exception e) { throw new PlatformExecutionException("Giraph job failed with exception: ", e); } if (result != 0) { throw new PlatformExecutionException("Giraph job completed with exit code = " + result); } return new PlatformBenchmarkResult(NestedConfiguration.empty()); }
From source file:nl.tudelft.graphalytics.graphlab.GraphLabPlatform.java
License:Apache License
@Override public void uploadGraph(Graph graph, String graphFilePath) throws Exception { LOG.entry(graph, graphFilePath);/*w ww .j a v a 2s . c om*/ if (USE_HADOOP) { String uploadPath = Paths.get(hdfsDirectory, getName(), "input", graph.getName()).toString(); // Upload the graph to HDFS FileSystem fs = FileSystem.get(new Configuration()); fs.copyFromLocalFile(new Path(graphFilePath), new Path(uploadPath)); fs.close(); // Track available datasets in a map pathsOfGraphs.put(graph.getName(), fs.getHomeDirectory().toUri() + "/" + uploadPath); } else { // Use local files, so just put the local file path in the map pathsOfGraphs.put(graph.getName(), graphFilePath); } LOG.exit(); }
From source file:nl.utwente.mirex.QueryTermCount.java
License:Open Source License
/** * Runs the MapReduce job that gets global statistics * @param args 0: path to parsed document collection (use AnchorExtract); 1: TREC query file; 2: MIREX query file with global statistics * @usage. //from w w w. j a v a 2 s .c o m * <code> % hadoop jar mirex-0.2.jar nl.utwente.mirex.QueryTermCount WARC warc wt2010-topics.stats wt2010-topics.queries-only </code> */ public static void main(String[] args) throws Exception { if (args.length != 3 && args.length != 4) { System.out.printf("Usage: %s [inputFormat] inputFiles topicFile outputFile\n", QueryTermCount.class.getSimpleName()); System.out.println(" inputFormat: either WARC or KEYVAL; default WARC"); System.out.println(" inputFiles: path to data"); System.out.println(" outputFile: topic file with statistics"); System.out.println(" topicFile: topic file in format queryId: term1 term2..."); System.exit(1); } Path tempOut = new Path(tempName); int argc = 0; String inputFormat = "WARC"; if (args.length > 3) { inputFormat = args[argc++]; } Path inputFile = new Path(args[argc++]); Path topicFile = new Path(args[argc++]); Path outputFile = new Path(args[argc++]); java.util.Map<String, Long> queryCounts = new HashMap<String, Long>(); // Stop if out file exists FileSystem hdfs = FileSystem.get(new Configuration()); if (hdfs.exists(outputFile)) { System.err.println("Output file " + outputFile + " already exists."); System.exit(1); } hdfs.delete(tempOut, true); // Run the job Job job = configureJob("QueryTermCount", inputFormat, inputFile, tempOut, topicFile); job.waitForCompletion(true); // Get created global statistics from all files which start with "part" from tempOut try { String tempLine; FileStatus[] status = hdfs.listStatus(tempOut); for (int i = 0; i < status.length; i++) { String fileName = status[i].getPath().getName(); if (!fileName.startsWith("part")) continue; FSDataInputStream dis = hdfs.open(status[i].getPath()); //BufferedReader in = new BufferedReader(); BufferedReader in = new BufferedReader(new InputStreamReader(dis)); while ((tempLine = in.readLine()) != null) { String[] fields = tempLine.split("\t"); queryCounts.put(fields[0], new Long(fields[1])); } dis.close(); } } catch (IOException ioe) { System.err.println(StringUtils.stringifyException(ioe)); System.exit(1); } // Write new topic file with global statistics try { String tempLine; FSDataOutputStream dos = hdfs.create(outputFile); dos.writeBytes( "#MIREX-COMMENT: query term weight, document frequency, collection frequency (for each term)\n"); dos.writeBytes("#MIREX-COLLECTION:" + inputFile + "\n"); dos.writeBytes("#" + CollectionLength + ":" + queryCounts.get(CollectionLength) + "\n"); dos.writeBytes("#" + NumberOfDocs + ":" + queryCounts.get(NumberOfDocs) + "\n"); FSDataInputStream dis = hdfs.open(topicFile); BufferedReader in = new BufferedReader(new InputStreamReader(dis)); while ((tempLine = in.readLine()) != null) { String[] fields = tempLine.toLowerCase().split(":"); dos.writeBytes(fields[0] + ":"); String[] terms = fields[1].replaceAll("=", " ").split(TOKENIZER); for (int i = 0; i < terms.length; i++) { Long df, cf; if (queryCounts.containsKey(DF + terms[i])) { df = queryCounts.get(DF + terms[i]); cf = queryCounts.get(CF + terms[i]); } else { df = 0l; cf = 0l; } dos.writeBytes(terms[i] + "=1=" + df.toString() + "=" + cf.toString()); if (i < terms.length - 1) dos.writeBytes(" "); } dos.writeBytes("\n"); } dis.close(); dos.close(); } catch (IOException ioe) { System.err.println(StringUtils.stringifyException(ioe)); System.exit(1); } hdfs.close(); }
From source file:org.apache.ambari.fast_hdfs_resource.Runner.java
License:Apache License
public static void main(String[] args) throws IOException, URISyntaxException { // 1 - Check arguments if (args.length != 1) { System.err.println(/*from www .j a v a2 s. com*/ "Incorrect number of arguments. Please provide:\n" + "1) Path to json file\n" + "Exiting..."); System.exit(1); } // 2 - Check if json-file exists final String jsonFilePath = args[0]; File file = new File(jsonFilePath); if (!file.isFile()) { System.err.println("File " + jsonFilePath + " doesn't exist.\nExiting..."); System.exit(1); } Gson gson = new Gson(); Resource[] resources = null; FileSystem dfs = null; try { Configuration conf = new Configuration(); dfs = FileSystem.get(conf); // 3 - Load data from JSON resources = (Resource[]) gson.fromJson(new FileReader(jsonFilePath), Resource[].class); // 4 - Connect to HDFS System.out.println("Using filesystem uri: " + FileSystem.getDefaultUri(conf).toString()); dfs.initialize(FileSystem.getDefaultUri(conf), conf); for (Resource resource : resources) { System.out.println("Creating: " + resource); Resource.checkResourceParameters(resource, dfs); Path pathHadoop = new Path(resource.getTarget()); if (!resource.isManageIfExists() && dfs.exists(pathHadoop)) { System.out.println("Skipping the operation for not managed DFS directory " + resource.getTarget() + " since immutable_paths contains it."); continue; } if (resource.getAction().equals("create")) { // 5 - Create Resource.createResource(resource, dfs, pathHadoop); Resource.setMode(resource, dfs, pathHadoop); Resource.setOwner(resource, dfs, pathHadoop); } else if (resource.getAction().equals("delete")) { // 6 - Delete dfs.delete(pathHadoop, true); } } } catch (Exception e) { System.out.println("Exception occurred, Reason: " + e.getMessage()); e.printStackTrace(); } finally { dfs.close(); } System.out.println("All resources created."); }
From source file:org.apache.ambari.servicemonitor.utils.DFSUtils.java
License:Apache License
/** * Close any non-null FS/*ww w.j a v a 2 s . c o m*/ * * @param hdfs filesystem * @return null, always */ public static DistributedFileSystem closeDFS(FileSystem hdfs) { if (hdfs != null) { try { hdfs.close(); } catch (IOException ignore) { } } return null; }
From source file:org.apache.ambari.view.slider.SliderAppsViewControllerImpl.java
License:Apache License
private <T> T invokeHDFSClientRunnable(final HDFSClientRunnable<T> runnable, final Map<String, String> hadoopConfigs) throws IOException, InterruptedException { ClassLoader currentClassLoader = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); try {// w w w. ja v a 2s .c o m boolean securityEnabled = Boolean.valueOf(hadoopConfigs.get("security_enabled")); final HdfsConfiguration hdfsConfiguration = new HdfsConfiguration(); for (Entry<String, String> entry : hadoopConfigs.entrySet()) { hdfsConfiguration.set(entry.getKey(), entry.getValue()); } UserGroupInformation.setConfiguration(hdfsConfiguration); UserGroupInformation sliderUser; String loggedInUser = getUserToRunAs(hadoopConfigs); if (securityEnabled) { String viewPrincipal = getViewParameterValue(PARAM_VIEW_PRINCIPAL); String viewPrincipalKeytab = getViewParameterValue(PARAM_VIEW_PRINCIPAL_KEYTAB); UserGroupInformation ambariUser = UserGroupInformation .loginUserFromKeytabAndReturnUGI(viewPrincipal, viewPrincipalKeytab); if (loggedInUser.equals(ambariUser.getShortUserName())) { // HDFS throws exception when caller tries to impresonate themselves. // User: admin@EXAMPLE.COM is not allowed to impersonate admin sliderUser = ambariUser; } else { sliderUser = UserGroupInformation.createProxyUser(loggedInUser, ambariUser); } } else { sliderUser = UserGroupInformation.getBestUGI(null, loggedInUser); } try { T value = sliderUser.doAs(new PrivilegedExceptionAction<T>() { @Override public T run() throws Exception { String fsPath = hadoopConfigs.get("fs.defaultFS"); FileSystem fs = FileSystem.get(URI.create(fsPath), hdfsConfiguration); try { return runnable.run(fs); } finally { fs.close(); } } }); return value; } catch (UndeclaredThrowableException e) { throw e; } } finally { Thread.currentThread().setContextClassLoader(currentClassLoader); } }