Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHDFSSourceIT.java

License:Apache License

@Test
public void testWrongHDFSDirLocation() throws Exception {
    ClusterHdfsConfigBean conf = new ClusterHdfsConfigBean();
    conf.hdfsUri = miniDFS.getURI().toString();
    conf.hdfsDirLocations = Arrays.asList(dir.toUri().getPath());
    conf.hdfsConfigs = new HashMap<>();
    conf.hdfsConfigs.put("x", "X");
    conf.dataFormat = DataFormat.TEXT;//w ww.j a va2  s  .co  m
    conf.dataFormatConfig.textMaxLineLen = 1024;

    conf.hdfsUri = "/pathwithnoschemeorauthority";
    ClusterHdfsSource clusterHdfsSource = createSource(conf);
    try {
        List<ConfigIssue> issues = clusterHdfsSource.init(null, ContextInfoCreator
                .createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 1, issues.size());
        assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_02"));

        conf.hdfsUri = "file://localhost:8020/";
        clusterHdfsSource = createSource(conf);
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 1, issues.size());
        assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_12"));

        conf.hdfsUri = "hdfs:///noauthority";
        clusterHdfsSource = createSource(conf);
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 1, issues.size());
        assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_13"));

        conf.hdfsUri = "hdfs://localhost:50000";
        clusterHdfsSource = createSource(conf);
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 1, issues.size());
        assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_11"));

        conf.hdfsUri = miniDFS.getURI().toString();
        conf.hdfsDirLocations = Arrays.asList("/pathdoesnotexist");
        clusterHdfsSource = createSource(conf);
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 1, issues.size());
        assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_10"));

        conf.hdfsUri = miniDFS.getURI().toString();
        conf.hdfsDirLocations = Arrays.asList(dir.toUri().getPath());
        FileSystem fs = miniDFS.getFileSystem();
        Path someFile = new Path(new Path(dir.toUri()), "/someFile");
        fs.create(someFile).close();
        clusterHdfsSource = createSource(conf);
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 0, issues.size());

        conf.hdfsUri = null;
        conf.hdfsConfigs.put(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, miniDFS.getURI().toString());
        someFile = new Path(new Path(dir.toUri()), "/someFile2");
        fs.create(someFile).close();
        clusterHdfsSource = createSource(conf);
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 0, issues.size());

        Path dummyFile = new Path(new Path(dir.toUri()), "/dummyFile");
        fs.create(dummyFile).close();
        conf.hdfsUri = miniDFS.getURI().toString();
        conf.hdfsDirLocations = Arrays.asList(dummyFile.toUri().getPath());
        clusterHdfsSource = createSource(conf);
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 1, issues.size());
        assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_15"));

        Path emptyDir = new Path(dir.toUri().getPath(), "emptyDir");
        fs.mkdirs(emptyDir);
        conf.hdfsUri = miniDFS.getURI().toString();
        conf.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath());
        clusterHdfsSource = createSource(conf);
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 1, issues.size());
        assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_16"));

        Path path1 = new Path(emptyDir, "path1");
        fs.create(path1).close();
        conf.hdfsUri = miniDFS.getURI().toString();
        conf.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath());
        clusterHdfsSource = createSource(conf);
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 0, issues.size());
    } finally {
        clusterHdfsSource.destroy();
    }
}

From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.TestClusterHDFSSource.java

License:Apache License

@Test
public void testWrongHDFSDirLocation() throws Exception {
    ClusterHdfsDSource dSource = new ForTestClusterHdfsDSource();
    configure(dSource, dir.toUri().getPath());
    dSource.hdfsUri = "/pathwithnoschemeorauthority";
    ClusterHdfsSource clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
    try {//  w  w w.  j  ava2s .  c  o m
        List<ConfigIssue> issues = clusterHdfsSource.init(null, ContextInfoCreator
                .createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 1, issues.size());
        assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_02"));

        dSource.hdfsUri = "file://localhost:8020/";
        clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 1, issues.size());
        assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_12"));

        dSource.hdfsUri = "hdfs:///noauthority";
        clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 1, issues.size());
        assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_13"));

        dSource.hdfsUri = "hdfs://localhost:8020";
        clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 1, issues.size());
        assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_11"));

        dSource.hdfsUri = miniDFS.getURI().toString();
        dSource.hdfsDirLocations = Arrays.asList("/pathdoesnotexist");
        clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 1, issues.size());
        assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_10"));

        dSource.hdfsUri = miniDFS.getURI().toString();
        dSource.hdfsDirLocations = Arrays.asList(dir.toUri().getPath());
        FileSystem fs = miniDFS.getFileSystem();
        Path someFile = new Path(new Path(dir.toUri()), "/someFile");
        fs.create(someFile).close();
        clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 0, issues.size());

        dSource.hdfsUri = null;
        dSource.hdfsConfigs.put(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, miniDFS.getURI().toString());
        someFile = new Path(new Path(dir.toUri()), "/someFile2");
        fs.create(someFile).close();
        clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 0, issues.size());

        Path dummyFile = new Path(new Path(dir.toUri()), "/dummyFile");
        fs.create(dummyFile).close();
        dSource.hdfsUri = miniDFS.getURI().toString();
        dSource.hdfsDirLocations = Arrays.asList(dummyFile.toUri().getPath());
        clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 1, issues.size());
        assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_15"));

        Path emptyDir = new Path(dir.toUri().getPath(), "emptyDir");
        fs.mkdirs(emptyDir);
        dSource.hdfsUri = miniDFS.getURI().toString();
        dSource.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath());
        clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 1, issues.size());
        assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_16"));

        Path path1 = new Path(emptyDir, "path1");
        fs.create(path1).close();
        dSource.hdfsUri = miniDFS.getURI().toString();
        dSource.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath());
        clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
        issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false,
                OnRecordError.TO_ERROR, ImmutableList.of("lane")));
        assertEquals(String.valueOf(issues), 0, issues.size());
    } finally {
        clusterHdfsSource.destroy();
    }
}

From source file:com.talis.hadoop.rdf.RdfSolrJob.java

License:Apache License

private void writeShardManifest(String manifestLocation, String shardLocation, Configuration configuration)
        throws IOException {
    Path shardsPath = new Path(INTERMEDIATE_SHARDS_URI);
    FileSystem fs = FileSystem.get(shardsPath.toUri(), configuration);
    StringBuffer buf = new StringBuffer();
    for (FileStatus status : fs.listStatus(shardsPath)) {
        LOG.info(status.getPath() + " : " + status.isDir());
        if (status.isDir()) {
            buf.append(status.getPath());
            buf.append("\n");
        }/*from w ww .  j  av a  2  s.  co  m*/
    }
    FSDataOutputStream out = fs.create(new Path(manifestLocation));
    out.write(buf.toString().getBytes());
    out.flush();
    out.close();
}

From source file:com.talis.labs.pagerank.mapreduce.HTMLTable.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: HTMLTable <input path> <rows> <columns>");
        return -1;
    }/* w w  w . ja v  a  2  s . com*/

    int rows = Integer.parseInt(args[1]);
    int columns = Integer.parseInt(args[2]);

    String[][] pageranks = new String[rows][columns];
    ArrayList<String> correct = new ArrayList<String>();

    FileSystem fs = FileSystem.get(getConf());
    PrintWriter out = new PrintWriter(
            fs.create(new Path(args[0] + File.separator + "pageranks.html")).getWrappedStream());

    {
        BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(
                new Path(args[0] + File.separator + "sorted-pageranks" + File.separator + "part-00000"))));
        for (int i = 0; i < rows; i++) {
            StringTokenizer st = new StringTokenizer(in.readLine());
            correct.add(st.nextToken());
        }
        in.close();
    }

    for (int j = 0; j < columns; j++) {
        BufferedReader in = new BufferedReader(new InputStreamReader(
                fs.open(new Path(args[0] + File.separator + "sorted-pageranks-" + j + ".dat"))));
        for (int i = 0; i < rows; i++) {
            pageranks[i][j] = in.readLine();
        }
        in.close();
    }

    out.println("<table cellspacing=\"2\" cellpadding=\"2\">");

    if (PIXELS_ONLY) {
        for (int i = 0; i < rows; i++) {
            out.print("<tr>");
            for (int j = 0; j < columns; j++) {
                String color = "#cc0000";
                if (correct.get(i).equals(pageranks[i][j])) {
                    color = "#00cc00";
                } else if (correct.contains(pageranks[i][j])) {
                    color = "#cccc00";
                }
                out.print("<td bgcolor=\"" + color + "\"><img src=\"pixel.gif\" width=4 height=4 /></td>");
            }
            out.println("</tr>");
        }
    } else {
        out.print("<tr><td>&nbsp;</td>");
        for (int j = 0; j < columns; j++) {
            out.print("<td>" + (j + 1) + "</td>");
        }
        out.println("</tr>");

        for (int i = 0; i < rows; i++) {
            out.print("<tr><td>" + (i + 1) + "</td>");
            for (int j = 0; j < columns; j++) {
                String color = "#cc0000";
                if (correct.get(i).equals(pageranks[i][j])) {
                    color = "#00cc00";
                } else if (correct.contains(pageranks[i][j])) {
                    color = "#cccc00";
                }

                out.print("<td bgcolor=\"" + color + "\">" + pageranks[i][j] + "</td>");
            }
            out.println("</tr>");
        }
    }

    out.println("</table>");

    out.close();

    return 0;
}

From source file:com.talis.labs.pagerank.mapreduce.PageRank.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        System.err.println("Usage: PageRank <input path> <output path> <iterations> <tolerance>");
        return -1;
    }//from w w  w  .j av a  2s  .  c o m

    FileSystem fs = FileSystem.get(getConf());
    String input = args[1] + File.separator + "previous-pageranks";
    String output = args[1] + File.separator + "current-pageranks";

    ToolRunner.run(getConf(), new CheckingData(), new String[] { args[0], output });
    ToolRunner.run(getConf(), new CountPages(), new String[] { output, args[1] + File.separator + "count" });
    String count = read(fs, args[1] + File.separator + "count");
    ToolRunner.run(getConf(), new InitializePageRanks(), new String[] { output, input, count });
    int i = 0;
    while (i < Integer.parseInt(args[2])) {
        ToolRunner.run(getConf(), new DanglingPages(),
                new String[] { input, args[1] + File.separator + "dangling" });
        String dangling = read(fs, args[1] + File.separator + "dangling");
        ToolRunner.run(getConf(), new UpdatePageRanks(), new String[] { input, output, count, dangling });
        swap(fs, new Path(output), new Path(input));
        if ((i > CHECK_CONVERGENCE_FREQUENCY) && (i % CHECK_CONVERGENCE_FREQUENCY == 0)) {
            ToolRunner.run(getConf(), new CheckConvergence(),
                    new String[] { input, args[1] + File.separator + "convergence" });
            double tolerance = Double.parseDouble(read(fs, args[1] + File.separator + "convergence"));
            if (tolerance <= Double.parseDouble(args[3])) {
                break;
            }
        }

        if (HTML_TABLE) {
            ToolRunner.run(getConf(), new SortPageRanks(),
                    new String[] { input, args[1] + File.separator + "sorted-pageranks" });
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(
                    new Path(args[1] + File.separator + "sorted-pageranks" + File.separator + "part-00000"))));
            PrintWriter out = new PrintWriter(
                    fs.create(new Path(args[1] + File.separator + "sorted-pageranks-" + i + ".dat"))
                            .getWrappedStream());
            for (int j = 0; j < HTML_TABLE_ROWS; j++) {
                StringTokenizer st = new StringTokenizer(in.readLine());
                out.write(st.nextToken() + "\n");
            }
            in.close();
            out.close();
        }

        i++;
    }

    ToolRunner.run(getConf(), new SortPageRanks(),
            new String[] { input, args[1] + File.separator + "sorted-pageranks" });

    if (HTML_TABLE) {
        ToolRunner.run(getConf(), new HTMLTable(),
                new String[] { args[1], Integer.toString(HTML_TABLE_ROWS), Integer.toString(i) });
    }

    return 0;
}

From source file:com.toddbodnar.simpleHive.IO.hdfsFile.java

public static hdfsFile transferToHDFS(file f) throws IOException {
    if (f.getClass().equals(hdfsFile.class))//if the file to be put on hdfs is already on hdfs
        return (hdfsFile) f; //just return the file

    FileSystem fs = FileSystem.get(GetConfiguration.get());
    Path theFile;//w ww  .  ja  v a 2  s .c o  m
    do {
        theFile = new Path(settings.hdfs_prefix + "/LOCAL_TABLE_" + System.currentTimeMillis() + "_"
                + Math.round(Math.random() * 10000));
    } while (fs.exists(theFile));

    BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(theFile)));

    f.resetStream();
    while (f.hasNext()) {
        out.write(f.readNextLine() + "\n");
    }
    out.close();
    fs.close();
    return new hdfsFile(theFile);
}

From source file:com.toddbodnar.simpleHive.IO.hdfsFileTest.java

@BeforeClass
public static void setUp() {
    try {/* w ww.  j  av  a 2 s. c o  m*/
        Path testFile = new Path("hdfs://localhost:8020///" + settings.hdfs_prefix + "/hdfsFileTest.csv");
        FileSystem fs = FileSystem.get(GetConfiguration.get());
        if (fs.exists(testFile)) {
            fs.delete(testFile, true);
        }
        BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(testFile)));
        out.write("hello\nworld!\n");
        out.close();
        fs.close();
    } catch (Exception ex) {
        System.err.println("Could not create file on HDFS: " + ex);
        nohdfs = true;
    }
}

From source file:com.tomslabs.grid.avro.AvroFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<T, Object> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration config = context.getConfiguration();

    Schema schema = getWriteSchema(config);
    DatumWriter<T> datumWriter = getDatumWriter(config);

    final DataFileWriter<T> writer = new DataFileWriter<T>(datumWriter);

    if (getCompressOutput(context)) {
        int level = config.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        writer.setCodec(CodecFactory.deflateCodec(level));
    }/*w ww  .  j  a v  a2  s.  co m*/

    Path file = getDefaultWorkFile(context, EXT);
    FileSystem fs = file.getFileSystem(config);

    writer.create(schema, fs.create(file));

    return new AvroRecordWriter<T>(writer);
}

From source file:com.trace.hadoop.TestDFSRename.java

License:Apache License

static void createFile(FileSystem fs, Path f) throws IOException {
    DataOutputStream a_out = fs.create(f);
    a_out.writeBytes("something");
    a_out.close();/*w w  w  .j a v  a2s . co m*/
}

From source file:com.trace.hadoop.TestDFSRename.java

License:Apache License

public void testRename() throws Exception {
    FileSystem fs = cluster.getFileSystem();
    assertTrue(fs.mkdirs(dir));/* w ww  .  j  a v  a  2 s. com*/

    { //test lease
        Path a = new Path(dir, "a");
        Path aa = new Path(dir, "aa");
        Path b = new Path(dir, "b");

        createFile(fs, a);

        //should not have any lease
        assertEquals(0, countLease(cluster));

        createFile(fs, aa);
        DataOutputStream aa_out = fs.create(aa);
        aa_out.writeBytes("something");

        //should have 1 lease
        assertEquals(1, countLease(cluster));
        list(fs, "rename0");
        fs.rename(a, b);
        list(fs, "rename1");
        aa_out.writeBytes(" more");
        aa_out.close();
        list(fs, "rename2");

        //should not have any lease
        assertEquals(0, countLease(cluster));
    }

    { // test non-existent destination
        Path dstPath = new Path("/c/d");
        assertFalse(fs.exists(dstPath));
        assertFalse(fs.rename(dir, dstPath));
    }

    { // dst cannot be a file or directory under src
      // test rename /a/b/foo to /a/b/c
        Path src = new Path("/a/b");
        Path dst = new Path("/a/b/c");

        createFile(fs, new Path(src, "foo"));

        // dst cannot be a file under src
        assertFalse(fs.rename(src, dst));

        // dst cannot be a directory under src
        assertFalse(fs.rename(src.getParent(), dst.getParent()));
    }

    { // dst can start with src, if it is not a directory or file under src
      // test rename /test /testfile
        Path src = new Path("/testPrefix");
        Path dst = new Path("/testPrefixfile");

        createFile(fs, src);
        assertTrue(fs.rename(src, dst));
    }

    { // dst should not be same as src test rename /a/b/c to /a/b/c
        Path src = new Path("/a/b/c");
        createFile(fs, src);
        assertTrue(fs.rename(src, src));
        assertFalse(fs.rename(new Path("/a/b"), new Path("/a/b/")));
        assertTrue(fs.rename(src, new Path("/a/b/c/")));
    }

    fs.delete(dir, true);
}