List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f) throws IOException
From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHDFSSourceIT.java
License:Apache License
@Test public void testWrongHDFSDirLocation() throws Exception { ClusterHdfsConfigBean conf = new ClusterHdfsConfigBean(); conf.hdfsUri = miniDFS.getURI().toString(); conf.hdfsDirLocations = Arrays.asList(dir.toUri().getPath()); conf.hdfsConfigs = new HashMap<>(); conf.hdfsConfigs.put("x", "X"); conf.dataFormat = DataFormat.TEXT;//w ww.j a va2 s .co m conf.dataFormatConfig.textMaxLineLen = 1024; conf.hdfsUri = "/pathwithnoschemeorauthority"; ClusterHdfsSource clusterHdfsSource = createSource(conf); try { List<ConfigIssue> issues = clusterHdfsSource.init(null, ContextInfoCreator .createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_02")); conf.hdfsUri = "file://localhost:8020/"; clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_12")); conf.hdfsUri = "hdfs:///noauthority"; clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_13")); conf.hdfsUri = "hdfs://localhost:50000"; clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_11")); conf.hdfsUri = miniDFS.getURI().toString(); conf.hdfsDirLocations = Arrays.asList("/pathdoesnotexist"); clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_10")); conf.hdfsUri = miniDFS.getURI().toString(); conf.hdfsDirLocations = Arrays.asList(dir.toUri().getPath()); FileSystem fs = miniDFS.getFileSystem(); Path someFile = new Path(new Path(dir.toUri()), "/someFile"); fs.create(someFile).close(); clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); conf.hdfsUri = null; conf.hdfsConfigs.put(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, miniDFS.getURI().toString()); someFile = new Path(new Path(dir.toUri()), "/someFile2"); fs.create(someFile).close(); clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); Path dummyFile = new Path(new Path(dir.toUri()), "/dummyFile"); fs.create(dummyFile).close(); conf.hdfsUri = miniDFS.getURI().toString(); conf.hdfsDirLocations = Arrays.asList(dummyFile.toUri().getPath()); clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_15")); Path emptyDir = new Path(dir.toUri().getPath(), "emptyDir"); fs.mkdirs(emptyDir); conf.hdfsUri = miniDFS.getURI().toString(); conf.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath()); clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_16")); Path path1 = new Path(emptyDir, "path1"); fs.create(path1).close(); conf.hdfsUri = miniDFS.getURI().toString(); conf.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath()); clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); } finally { clusterHdfsSource.destroy(); } }
From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.TestClusterHDFSSource.java
License:Apache License
@Test public void testWrongHDFSDirLocation() throws Exception { ClusterHdfsDSource dSource = new ForTestClusterHdfsDSource(); configure(dSource, dir.toUri().getPath()); dSource.hdfsUri = "/pathwithnoschemeorauthority"; ClusterHdfsSource clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); try {// w w w. j ava2s . c o m List<ConfigIssue> issues = clusterHdfsSource.init(null, ContextInfoCreator .createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_02")); dSource.hdfsUri = "file://localhost:8020/"; clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_12")); dSource.hdfsUri = "hdfs:///noauthority"; clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_13")); dSource.hdfsUri = "hdfs://localhost:8020"; clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_11")); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList("/pathdoesnotexist"); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_10")); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList(dir.toUri().getPath()); FileSystem fs = miniDFS.getFileSystem(); Path someFile = new Path(new Path(dir.toUri()), "/someFile"); fs.create(someFile).close(); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); dSource.hdfsUri = null; dSource.hdfsConfigs.put(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, miniDFS.getURI().toString()); someFile = new Path(new Path(dir.toUri()), "/someFile2"); fs.create(someFile).close(); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); Path dummyFile = new Path(new Path(dir.toUri()), "/dummyFile"); fs.create(dummyFile).close(); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList(dummyFile.toUri().getPath()); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_15")); Path emptyDir = new Path(dir.toUri().getPath(), "emptyDir"); fs.mkdirs(emptyDir); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath()); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_16")); Path path1 = new Path(emptyDir, "path1"); fs.create(path1).close(); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath()); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); } finally { clusterHdfsSource.destroy(); } }
From source file:com.talis.hadoop.rdf.RdfSolrJob.java
License:Apache License
private void writeShardManifest(String manifestLocation, String shardLocation, Configuration configuration) throws IOException { Path shardsPath = new Path(INTERMEDIATE_SHARDS_URI); FileSystem fs = FileSystem.get(shardsPath.toUri(), configuration); StringBuffer buf = new StringBuffer(); for (FileStatus status : fs.listStatus(shardsPath)) { LOG.info(status.getPath() + " : " + status.isDir()); if (status.isDir()) { buf.append(status.getPath()); buf.append("\n"); }/*from w ww . j av a 2 s. co m*/ } FSDataOutputStream out = fs.create(new Path(manifestLocation)); out.write(buf.toString().getBytes()); out.flush(); out.close(); }
From source file:com.talis.labs.pagerank.mapreduce.HTMLTable.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3) { System.err.println("Usage: HTMLTable <input path> <rows> <columns>"); return -1; }/* w w w . ja v a 2 s . com*/ int rows = Integer.parseInt(args[1]); int columns = Integer.parseInt(args[2]); String[][] pageranks = new String[rows][columns]; ArrayList<String> correct = new ArrayList<String>(); FileSystem fs = FileSystem.get(getConf()); PrintWriter out = new PrintWriter( fs.create(new Path(args[0] + File.separator + "pageranks.html")).getWrappedStream()); { BufferedReader in = new BufferedReader(new InputStreamReader(fs.open( new Path(args[0] + File.separator + "sorted-pageranks" + File.separator + "part-00000")))); for (int i = 0; i < rows; i++) { StringTokenizer st = new StringTokenizer(in.readLine()); correct.add(st.nextToken()); } in.close(); } for (int j = 0; j < columns; j++) { BufferedReader in = new BufferedReader(new InputStreamReader( fs.open(new Path(args[0] + File.separator + "sorted-pageranks-" + j + ".dat")))); for (int i = 0; i < rows; i++) { pageranks[i][j] = in.readLine(); } in.close(); } out.println("<table cellspacing=\"2\" cellpadding=\"2\">"); if (PIXELS_ONLY) { for (int i = 0; i < rows; i++) { out.print("<tr>"); for (int j = 0; j < columns; j++) { String color = "#cc0000"; if (correct.get(i).equals(pageranks[i][j])) { color = "#00cc00"; } else if (correct.contains(pageranks[i][j])) { color = "#cccc00"; } out.print("<td bgcolor=\"" + color + "\"><img src=\"pixel.gif\" width=4 height=4 /></td>"); } out.println("</tr>"); } } else { out.print("<tr><td> </td>"); for (int j = 0; j < columns; j++) { out.print("<td>" + (j + 1) + "</td>"); } out.println("</tr>"); for (int i = 0; i < rows; i++) { out.print("<tr><td>" + (i + 1) + "</td>"); for (int j = 0; j < columns; j++) { String color = "#cc0000"; if (correct.get(i).equals(pageranks[i][j])) { color = "#00cc00"; } else if (correct.contains(pageranks[i][j])) { color = "#cccc00"; } out.print("<td bgcolor=\"" + color + "\">" + pageranks[i][j] + "</td>"); } out.println("</tr>"); } } out.println("</table>"); out.close(); return 0; }
From source file:com.talis.labs.pagerank.mapreduce.PageRank.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 4) { System.err.println("Usage: PageRank <input path> <output path> <iterations> <tolerance>"); return -1; }//from w w w .j av a 2s . c o m FileSystem fs = FileSystem.get(getConf()); String input = args[1] + File.separator + "previous-pageranks"; String output = args[1] + File.separator + "current-pageranks"; ToolRunner.run(getConf(), new CheckingData(), new String[] { args[0], output }); ToolRunner.run(getConf(), new CountPages(), new String[] { output, args[1] + File.separator + "count" }); String count = read(fs, args[1] + File.separator + "count"); ToolRunner.run(getConf(), new InitializePageRanks(), new String[] { output, input, count }); int i = 0; while (i < Integer.parseInt(args[2])) { ToolRunner.run(getConf(), new DanglingPages(), new String[] { input, args[1] + File.separator + "dangling" }); String dangling = read(fs, args[1] + File.separator + "dangling"); ToolRunner.run(getConf(), new UpdatePageRanks(), new String[] { input, output, count, dangling }); swap(fs, new Path(output), new Path(input)); if ((i > CHECK_CONVERGENCE_FREQUENCY) && (i % CHECK_CONVERGENCE_FREQUENCY == 0)) { ToolRunner.run(getConf(), new CheckConvergence(), new String[] { input, args[1] + File.separator + "convergence" }); double tolerance = Double.parseDouble(read(fs, args[1] + File.separator + "convergence")); if (tolerance <= Double.parseDouble(args[3])) { break; } } if (HTML_TABLE) { ToolRunner.run(getConf(), new SortPageRanks(), new String[] { input, args[1] + File.separator + "sorted-pageranks" }); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open( new Path(args[1] + File.separator + "sorted-pageranks" + File.separator + "part-00000")))); PrintWriter out = new PrintWriter( fs.create(new Path(args[1] + File.separator + "sorted-pageranks-" + i + ".dat")) .getWrappedStream()); for (int j = 0; j < HTML_TABLE_ROWS; j++) { StringTokenizer st = new StringTokenizer(in.readLine()); out.write(st.nextToken() + "\n"); } in.close(); out.close(); } i++; } ToolRunner.run(getConf(), new SortPageRanks(), new String[] { input, args[1] + File.separator + "sorted-pageranks" }); if (HTML_TABLE) { ToolRunner.run(getConf(), new HTMLTable(), new String[] { args[1], Integer.toString(HTML_TABLE_ROWS), Integer.toString(i) }); } return 0; }
From source file:com.toddbodnar.simpleHive.IO.hdfsFile.java
public static hdfsFile transferToHDFS(file f) throws IOException { if (f.getClass().equals(hdfsFile.class))//if the file to be put on hdfs is already on hdfs return (hdfsFile) f; //just return the file FileSystem fs = FileSystem.get(GetConfiguration.get()); Path theFile;//w ww . ja v a 2 s .c o m do { theFile = new Path(settings.hdfs_prefix + "/LOCAL_TABLE_" + System.currentTimeMillis() + "_" + Math.round(Math.random() * 10000)); } while (fs.exists(theFile)); BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(theFile))); f.resetStream(); while (f.hasNext()) { out.write(f.readNextLine() + "\n"); } out.close(); fs.close(); return new hdfsFile(theFile); }
From source file:com.toddbodnar.simpleHive.IO.hdfsFileTest.java
@BeforeClass public static void setUp() { try {/* w ww. j av a 2 s. c o m*/ Path testFile = new Path("hdfs://localhost:8020///" + settings.hdfs_prefix + "/hdfsFileTest.csv"); FileSystem fs = FileSystem.get(GetConfiguration.get()); if (fs.exists(testFile)) { fs.delete(testFile, true); } BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(testFile))); out.write("hello\nworld!\n"); out.close(); fs.close(); } catch (Exception ex) { System.err.println("Could not create file on HDFS: " + ex); nohdfs = true; } }
From source file:com.tomslabs.grid.avro.AvroFileOutputFormat.java
License:Apache License
@Override public RecordWriter<T, Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration config = context.getConfiguration(); Schema schema = getWriteSchema(config); DatumWriter<T> datumWriter = getDatumWriter(config); final DataFileWriter<T> writer = new DataFileWriter<T>(datumWriter); if (getCompressOutput(context)) { int level = config.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); }/*w ww . j a v a2 s. co m*/ Path file = getDefaultWorkFile(context, EXT); FileSystem fs = file.getFileSystem(config); writer.create(schema, fs.create(file)); return new AvroRecordWriter<T>(writer); }
From source file:com.trace.hadoop.TestDFSRename.java
License:Apache License
static void createFile(FileSystem fs, Path f) throws IOException { DataOutputStream a_out = fs.create(f); a_out.writeBytes("something"); a_out.close();/*w w w .j a v a2s . co m*/ }
From source file:com.trace.hadoop.TestDFSRename.java
License:Apache License
public void testRename() throws Exception { FileSystem fs = cluster.getFileSystem(); assertTrue(fs.mkdirs(dir));/* w ww . j a v a 2 s. com*/ { //test lease Path a = new Path(dir, "a"); Path aa = new Path(dir, "aa"); Path b = new Path(dir, "b"); createFile(fs, a); //should not have any lease assertEquals(0, countLease(cluster)); createFile(fs, aa); DataOutputStream aa_out = fs.create(aa); aa_out.writeBytes("something"); //should have 1 lease assertEquals(1, countLease(cluster)); list(fs, "rename0"); fs.rename(a, b); list(fs, "rename1"); aa_out.writeBytes(" more"); aa_out.close(); list(fs, "rename2"); //should not have any lease assertEquals(0, countLease(cluster)); } { // test non-existent destination Path dstPath = new Path("/c/d"); assertFalse(fs.exists(dstPath)); assertFalse(fs.rename(dir, dstPath)); } { // dst cannot be a file or directory under src // test rename /a/b/foo to /a/b/c Path src = new Path("/a/b"); Path dst = new Path("/a/b/c"); createFile(fs, new Path(src, "foo")); // dst cannot be a file under src assertFalse(fs.rename(src, dst)); // dst cannot be a directory under src assertFalse(fs.rename(src.getParent(), dst.getParent())); } { // dst can start with src, if it is not a directory or file under src // test rename /test /testfile Path src = new Path("/testPrefix"); Path dst = new Path("/testPrefixfile"); createFile(fs, src); assertTrue(fs.rename(src, dst)); } { // dst should not be same as src test rename /a/b/c to /a/b/c Path src = new Path("/a/b/c"); createFile(fs, src); assertTrue(fs.rename(src, src)); assertFalse(fs.rename(new Path("/a/b"), new Path("/a/b/"))); assertTrue(fs.rename(src, new Path("/a/b/c/"))); } fs.delete(dir, true); }