Example usage for org.apache.hadoop.fs FileSystem close

List of usage examples for org.apache.hadoop.fs FileSystem close

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem close.

Prototype

@Override
public void close() throws IOException 

Source Link

Document

Close this FileSystem instance.

Usage

From source file:net.sf.nutchcontentexporter.NutchContentExporter.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 2) {
        System.out.println("two params required: segmentdir (-local | -dfs <namenode:port>) outputdir");
        return 1;
    }// w ww .ja v a 2 s .  c o  m

    try {
        FileSystem fs = FileSystem.get(conf);

        String segment = otherArgs[0];

        File outDir = new File(otherArgs[1]);
        if (!outDir.exists()) {
            if (outDir.mkdirs()) {
                System.out.println("Creating output dir " + outDir.getAbsolutePath());
            }
        }

        Path file = new Path(segment, Content.DIR_NAME + "/part-00000/data");
        // new 2.0 API
        SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file));

        Text key = new Text();
        Content content = new Content();

        while (reader.next(key, content)) {
            String filename = key.toString().replaceFirst("http://", "").replaceAll("/", "___").trim();

            // limit the output file name to 255 characters
            if (filename.length() > MAX_FILE_NAME_LENGTH) {
                filename = filename.substring(0, MAX_FILE_NAME_LENGTH);
            }

            File f = new File(outDir.getCanonicalPath() + "/" + filename);
            FileOutputStream fos = new FileOutputStream(f);
            fos.write(content.getContent());
            fos.close();
            System.out.println(f.getAbsolutePath());
        }
        reader.close();
        fs.close();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    return 0;
}

From source file:net.sf.nutchcontentexporter.NutchToWARCConverter.java

License:Apache License

/**
 * Input: Nutch segment folder (e.g. "20150303005802")
 * Ouput: gz/bz2 WARC file (e.g. "20150303005802.warc.gz/bz2")
 * Third parameter is an ouput file prefix (e.g. "prefix20150303005802.warc.gz")
 * <p/>/*  w ww .j  av  a  2  s. com*/
 * By default, the output is compressed with gz
 *
 * @param args args
 * @return int
 * @throws Exception
 */
@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    try {
        FileSystem fs = FileSystem.get(conf);

        String segmentDir = otherArgs[0];

        File outDir = new File(otherArgs[1]);
        if (!outDir.exists()) {
            if (outDir.mkdirs()) {
                System.out.println("Creating output dir " + outDir.getAbsolutePath());
            }
        }

        String outputFilePrefix = "";
        if (otherArgs.length >= 3) {
            outputFilePrefix = otherArgs[2];
        }

        boolean compressBz2 = false;
        // do we want bz2 output?
        if (otherArgs.length >= 4) {
            compressBz2 = "bz2".equals(otherArgs[3]);
        }

        Path file = new Path(segmentDir, Content.DIR_NAME + "/part-00000/data");

        String extension = ".warc." + (compressBz2 ? "bz2" : "gz");

        String segmentName = new File(segmentDir).getName();
        nutchSegmentToWARCFile(file, outDir, outputFilePrefix + segmentName, extension, conf, compressBz2);

        fs.close();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    return 0;
}

From source file:nl.bioinf.wvanhelvoirt.HadoopPhredCalculator.FastqFileRecordWriter.java

License:Open Source License

/**
 * Override method that writes the Reducer output to a file.
 *
 * @param key   LongWritable as key, which will not be used.
 * @param value TextArrayWritable containing the count data.
 * @throws IOException          Returns default exception.
 * @throws InterruptedException If connection problem.
 *//*from  w w w. j  a v a2s.  c  om*/
@Override
public void write(NullWritable key, TextArrayWritable value) throws IOException, InterruptedException {

    // Set the filesystem and delete path if it exists.
    FileSystem hdfs = FileSystem.get(this.mConf);
    if (hdfs.exists(this.mOutputPath)) {
        hdfs.delete(this.mOutputPath, false);
    }

    // Write the TextArrayWritable contents to a fastqc file.
    FSDataOutputStream out = hdfs.create(this.mOutputPath);
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out));
    for (Text line : value.get()) {
        bw.write(line.toString());
        bw.newLine();
    }
    bw.close();
    hdfs.close();
}

From source file:nl.tudelft.graphalytics.giraph.GiraphPlatform.java

License:Apache License

@Override
public void uploadGraph(Graph graph) throws Exception {
    LOG.info("Uploading graph \"{}\" to HDFS", graph.getName());

    String uploadPath = Paths.get(hdfsDirectory, getName(), "input", graph.getName()).toString();

    // Upload the graph to HDFS
    FileSystem fs = FileSystem.get(new Configuration());

    LOG.debug("- Uploading vertex list");
    fs.copyFromLocalFile(new Path(graph.getVertexFilePath()), new Path(uploadPath + ".v"));

    LOG.debug("- Uploading edge list");
    fs.copyFromLocalFile(new Path(graph.getEdgeFilePath()), new Path(uploadPath + ".e"));

    fs.close();

    // Track available datasets in a map
    pathsOfGraphs.put(graph.getName(), uploadPath);
}

From source file:nl.tudelft.graphalytics.giraph.GiraphPlatform.java

License:Apache License

@Override
public PlatformBenchmarkResult executeAlgorithmOnGraph(Benchmark benchmark) throws PlatformExecutionException {
    Algorithm algorithm = benchmark.getAlgorithm();
    Graph graph = benchmark.getGraph();// w w w  .  ja v a2s  .  com
    Object parameters = benchmark.getAlgorithmParameters();

    LOG.info("Executing algorithm \"{}\" on graph \"{}\".", algorithm.getName(), graph.getName());

    int result;
    try {
        // Prepare the appropriate job for the given algorithm type
        GiraphJob job;
        switch (algorithm) {
        case BFS:
            job = new BreadthFirstSearchJob(parameters, graph.getGraphFormat());
            break;
        case CDLP:
            job = new CommunityDetectionLPJob(parameters, graph.getGraphFormat());
            break;
        case WCC:
            job = new WeaklyConnectedComponentsJob(graph.getGraphFormat());
            break;
        case FFM:
            job = new ForestFireModelJob(parameters, graph.getGraphFormat());
            break;
        case LCC:
            job = new LocalClusteringCoefficientJob(graph.getGraphFormat());
            break;
        case PR:
            job = new PageRankJob(parameters, graph.getGraphFormat());
            break;
        case SSSP:
            job = new SingleSourceShortestPathJob(parameters, graph);
            break;
        default:
            throw new IllegalArgumentException("Unsupported algorithm: " + algorithm);
        }

        // Create the job configuration using the Giraph properties file
        String hdfsOutputPath = Paths.get(hdfsDirectory, getName(), "output", algorithm + "-" + graph.getName())
                .toString();
        Configuration jobConf = new Configuration();

        GiraphJob.INPUT_PATH.set(jobConf, pathsOfGraphs.get(graph.getName()));
        GiraphJob.OUTPUT_PATH.set(jobConf, hdfsOutputPath);
        GiraphJob.ZOOKEEPER_ADDRESS.set(jobConf, ConfigurationUtil.getString(giraphConfig, ZOOKEEPERADDRESS));

        transferIfSet(giraphConfig, JOB_WORKERCOUNT, jobConf, GiraphJob.WORKER_COUNT);
        transferIfSet(giraphConfig, JOB_HEAPSIZE, jobConf, GiraphJob.HEAP_SIZE_MB);
        transferIfSet(giraphConfig, JOB_MEMORYSIZE, jobConf, GiraphJob.WORKER_MEMORY_MB);

        transferGiraphOptions(giraphConfig, jobConf);

        // Execute the Giraph job
        result = ToolRunner.run(jobConf, job, new String[0]);
        // TODO: Clean up intermediate and output data, depending on some configuration.

        if (benchmark.isOutputRequired()) {
            FileSystem fs = FileSystem.get(new Configuration());
            fs.copyToLocalFile(new Path(hdfsOutputPath), new Path(benchmark.getOutputPath()));
            fs.close();
        }

    } catch (Exception e) {
        throw new PlatformExecutionException("Giraph job failed with exception: ", e);
    }

    if (result != 0) {
        throw new PlatformExecutionException("Giraph job completed with exit code = " + result);
    }

    return new PlatformBenchmarkResult(NestedConfiguration.empty());
}

From source file:nl.tudelft.graphalytics.graphlab.GraphLabPlatform.java

License:Apache License

@Override
public void uploadGraph(Graph graph, String graphFilePath) throws Exception {
    LOG.entry(graph, graphFilePath);/*w ww .j  a v a  2s  . c om*/

    if (USE_HADOOP) {
        String uploadPath = Paths.get(hdfsDirectory, getName(), "input", graph.getName()).toString();

        // Upload the graph to HDFS
        FileSystem fs = FileSystem.get(new Configuration());
        fs.copyFromLocalFile(new Path(graphFilePath), new Path(uploadPath));
        fs.close();

        // Track available datasets in a map
        pathsOfGraphs.put(graph.getName(), fs.getHomeDirectory().toUri() + "/" + uploadPath);
    } else {
        // Use local files, so just put the local file path in the map
        pathsOfGraphs.put(graph.getName(), graphFilePath);
    }

    LOG.exit();
}

From source file:nl.utwente.mirex.QueryTermCount.java

License:Open Source License

/**
 * Runs the MapReduce job that gets global statistics
 * @param args 0: path to parsed document collection (use AnchorExtract); 1: TREC query file; 2: MIREX query file with global statistics
 * @usage. //from w w w. j a  v a 2  s .c  o  m
 * <code> % hadoop jar mirex-0.2.jar nl.utwente.mirex.QueryTermCount  WARC warc wt2010-topics.stats wt2010-topics.queries-only  </code> 
 */
public static void main(String[] args) throws Exception {
    if (args.length != 3 && args.length != 4) {
        System.out.printf("Usage: %s [inputFormat] inputFiles topicFile outputFile\n",
                QueryTermCount.class.getSimpleName());
        System.out.println("          inputFormat: either WARC or KEYVAL; default WARC");
        System.out.println("          inputFiles: path to data");
        System.out.println("          outputFile: topic file with statistics");
        System.out.println("          topicFile: topic file in format queryId: term1 term2...");
        System.exit(1);
    }

    Path tempOut = new Path(tempName);
    int argc = 0;
    String inputFormat = "WARC";
    if (args.length > 3) {
        inputFormat = args[argc++];
    }
    Path inputFile = new Path(args[argc++]);
    Path topicFile = new Path(args[argc++]);
    Path outputFile = new Path(args[argc++]);

    java.util.Map<String, Long> queryCounts = new HashMap<String, Long>();

    // Stop if out file exists
    FileSystem hdfs = FileSystem.get(new Configuration());
    if (hdfs.exists(outputFile)) {
        System.err.println("Output file " + outputFile + " already exists.");
        System.exit(1);
    }
    hdfs.delete(tempOut, true);

    // Run the job
    Job job = configureJob("QueryTermCount", inputFormat, inputFile, tempOut, topicFile);
    job.waitForCompletion(true);

    // Get created global statistics from all files which start with "part" from tempOut
    try {
        String tempLine;
        FileStatus[] status = hdfs.listStatus(tempOut);
        for (int i = 0; i < status.length; i++) {
            String fileName = status[i].getPath().getName();
            if (!fileName.startsWith("part"))
                continue;
            FSDataInputStream dis = hdfs.open(status[i].getPath());
            //BufferedReader in = new BufferedReader();
            BufferedReader in = new BufferedReader(new InputStreamReader(dis));
            while ((tempLine = in.readLine()) != null) {
                String[] fields = tempLine.split("\t");
                queryCounts.put(fields[0], new Long(fields[1]));
            }
            dis.close();
        }
    } catch (IOException ioe) {
        System.err.println(StringUtils.stringifyException(ioe));
        System.exit(1);
    }

    // Write new topic file with global statistics
    try {
        String tempLine;
        FSDataOutputStream dos = hdfs.create(outputFile);
        dos.writeBytes(
                "#MIREX-COMMENT: query term weight, document frequency, collection frequency (for each term)\n");
        dos.writeBytes("#MIREX-COLLECTION:" + inputFile + "\n");
        dos.writeBytes("#" + CollectionLength + ":" + queryCounts.get(CollectionLength) + "\n");
        dos.writeBytes("#" + NumberOfDocs + ":" + queryCounts.get(NumberOfDocs) + "\n");

        FSDataInputStream dis = hdfs.open(topicFile);
        BufferedReader in = new BufferedReader(new InputStreamReader(dis));
        while ((tempLine = in.readLine()) != null) {
            String[] fields = tempLine.toLowerCase().split(":");
            dos.writeBytes(fields[0] + ":");
            String[] terms = fields[1].replaceAll("=", " ").split(TOKENIZER);
            for (int i = 0; i < terms.length; i++) {
                Long df, cf;
                if (queryCounts.containsKey(DF + terms[i])) {
                    df = queryCounts.get(DF + terms[i]);
                    cf = queryCounts.get(CF + terms[i]);
                } else {
                    df = 0l;
                    cf = 0l;
                }
                dos.writeBytes(terms[i] + "=1=" + df.toString() + "=" + cf.toString());
                if (i < terms.length - 1)
                    dos.writeBytes(" ");
            }
            dos.writeBytes("\n");
        }
        dis.close();
        dos.close();
    } catch (IOException ioe) {
        System.err.println(StringUtils.stringifyException(ioe));
        System.exit(1);
    }
    hdfs.close();
}

From source file:org.apache.ambari.fast_hdfs_resource.Runner.java

License:Apache License

public static void main(String[] args) throws IOException, URISyntaxException {
    // 1 - Check arguments
    if (args.length != 1) {
        System.err.println(/*from  www  .j  a  v a2 s. com*/
                "Incorrect number of arguments. Please provide:\n" + "1) Path to json file\n" + "Exiting...");
        System.exit(1);
    }

    // 2 - Check if json-file exists
    final String jsonFilePath = args[0];
    File file = new File(jsonFilePath);

    if (!file.isFile()) {
        System.err.println("File " + jsonFilePath + " doesn't exist.\nExiting...");
        System.exit(1);
    }

    Gson gson = new Gson();
    Resource[] resources = null;
    FileSystem dfs = null;

    try {
        Configuration conf = new Configuration();
        dfs = FileSystem.get(conf);

        // 3 - Load data from JSON
        resources = (Resource[]) gson.fromJson(new FileReader(jsonFilePath), Resource[].class);

        // 4 - Connect to HDFS
        System.out.println("Using filesystem uri: " + FileSystem.getDefaultUri(conf).toString());
        dfs.initialize(FileSystem.getDefaultUri(conf), conf);

        for (Resource resource : resources) {
            System.out.println("Creating: " + resource);

            Resource.checkResourceParameters(resource, dfs);

            Path pathHadoop = new Path(resource.getTarget());
            if (!resource.isManageIfExists() && dfs.exists(pathHadoop)) {
                System.out.println("Skipping the operation for not managed DFS directory "
                        + resource.getTarget() + " since immutable_paths contains it.");
                continue;
            }

            if (resource.getAction().equals("create")) {
                // 5 - Create
                Resource.createResource(resource, dfs, pathHadoop);
                Resource.setMode(resource, dfs, pathHadoop);
                Resource.setOwner(resource, dfs, pathHadoop);
            } else if (resource.getAction().equals("delete")) {
                // 6 - Delete
                dfs.delete(pathHadoop, true);
            }
        }
    } catch (Exception e) {
        System.out.println("Exception occurred, Reason: " + e.getMessage());
        e.printStackTrace();
    } finally {
        dfs.close();
    }

    System.out.println("All resources created.");
}

From source file:org.apache.ambari.servicemonitor.utils.DFSUtils.java

License:Apache License

/**
 * Close any non-null FS/*ww w.j  a v a 2 s . c  o  m*/
 *
 * @param hdfs filesystem
 * @return null, always
 */
public static DistributedFileSystem closeDFS(FileSystem hdfs) {
    if (hdfs != null) {
        try {
            hdfs.close();
        } catch (IOException ignore) {

        }
    }
    return null;
}

From source file:org.apache.ambari.view.slider.SliderAppsViewControllerImpl.java

License:Apache License

private <T> T invokeHDFSClientRunnable(final HDFSClientRunnable<T> runnable,
        final Map<String, String> hadoopConfigs) throws IOException, InterruptedException {
    ClassLoader currentClassLoader = Thread.currentThread().getContextClassLoader();
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
    try {// w w  w. ja v a  2s .c o  m
        boolean securityEnabled = Boolean.valueOf(hadoopConfigs.get("security_enabled"));
        final HdfsConfiguration hdfsConfiguration = new HdfsConfiguration();
        for (Entry<String, String> entry : hadoopConfigs.entrySet()) {
            hdfsConfiguration.set(entry.getKey(), entry.getValue());
        }
        UserGroupInformation.setConfiguration(hdfsConfiguration);
        UserGroupInformation sliderUser;
        String loggedInUser = getUserToRunAs(hadoopConfigs);
        if (securityEnabled) {
            String viewPrincipal = getViewParameterValue(PARAM_VIEW_PRINCIPAL);
            String viewPrincipalKeytab = getViewParameterValue(PARAM_VIEW_PRINCIPAL_KEYTAB);
            UserGroupInformation ambariUser = UserGroupInformation
                    .loginUserFromKeytabAndReturnUGI(viewPrincipal, viewPrincipalKeytab);
            if (loggedInUser.equals(ambariUser.getShortUserName())) {
                // HDFS throws exception when caller tries to impresonate themselves.
                // User: admin@EXAMPLE.COM is not allowed to impersonate admin
                sliderUser = ambariUser;
            } else {
                sliderUser = UserGroupInformation.createProxyUser(loggedInUser, ambariUser);
            }
        } else {
            sliderUser = UserGroupInformation.getBestUGI(null, loggedInUser);
        }
        try {
            T value = sliderUser.doAs(new PrivilegedExceptionAction<T>() {
                @Override
                public T run() throws Exception {
                    String fsPath = hadoopConfigs.get("fs.defaultFS");
                    FileSystem fs = FileSystem.get(URI.create(fsPath), hdfsConfiguration);
                    try {
                        return runnable.run(fs);
                    } finally {
                        fs.close();
                    }
                }
            });
            return value;
        } catch (UndeclaredThrowableException e) {
            throw e;
        }
    } finally {
        Thread.currentThread().setContextClassLoader(currentClassLoader);
    }
}