List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:boa.datagen.SeqSort.java
License:Apache License
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); String base = "hdfs://boa-njt/"; conf.set("fs.default.name", base); FileSystem fs = FileSystem.get(conf); String inPath = "/tmprepcache/2015-07/"; StringBuilder sb = new StringBuilder(); FileStatus[] files = fs.listStatus(new Path(base + inPath)); for (int i = 0; i < files.length; i++) { FileStatus file = files[i];/* w w w . ja v a 2s.co m*/ String name = file.getPath().getName(); if (name.startsWith("ast-") && name.endsWith(".seq")) { try { //ToolRunner.run(new Configuration(), new SeqSort(inPath + name, "/tmprepcache/2015-07-sorted/" + name), null); sb.append(name + "\n"); } catch (Throwable t) { t.printStackTrace(); } } } FileIO.writeFileContents(new File("files2sort.txt"), sb.toString()); }
From source file:boa.datagen.SeqSortMerge.java
License:Apache License
public static void main(String[] args) throws IOException { conf.set("fs.default.name", base); FileSystem fs = FileSystem.get(conf); String inPath = "/tmprepcache/2015-07-sorted/"; while (true) { FileStatus[] files = fs.listStatus(new Path(inPath)); if (files.length < 2) break; Path path = new Path(inPath + System.currentTimeMillis()); fs.mkdirs(path);// w ww. ja va2 s . c o m SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, new Path(inPath + path.getName() + "/part-00000"), Text.class, BytesWritable.class); FileStatus[] candidates = getCandidates(files); System.out.println("Merging " + candidates.length + " from " + files.length); SequenceFile.Reader[] readers = new SequenceFile.Reader[candidates.length]; for (int i = 0; i < candidates.length; i++) readers[i] = new SequenceFile.Reader(fs, new Path(inPath + candidates[i].getPath().getName() + "/part-00000"), conf); Text[] keys = new Text[candidates.length]; BytesWritable[] values = new BytesWritable[candidates.length]; read(readers, keys, values); while (true) { int index = min(keys); if (keys[index].toString().isEmpty()) break; w.append(keys[index], values[index]); read(readers[index], keys[index], values[index]); } for (int i = 0; i < readers.length; i++) readers[i].close(); w.close(); for (int i = 0; i < readers.length; i++) fs.delete(new Path(inPath + candidates[i].getPath().getName()), true); } }
From source file:byte_import.HexastoreBulkImport.java
License:Open Source License
private void loadHFiles() throws Exception { conf = HBaseConfiguration.create();//from ww w .j a v a2 s.co m HBaseAdmin hadmin = new HBaseAdmin(conf); Path hfofDir = new Path("out"); FileSystem fs = hfofDir.getFileSystem(conf); //if (!fs.exists(hfofDir)) { // throw new FileNotFoundException("HFileOutputFormat dir " + // hfofDir + " not found"); //} FileStatus[] familyDirStatuses = fs.listStatus(hfofDir); //if (familyDirStatuses == null) { // throw new FileNotFoundException("No families found in " + hfofDir); //} int length = 0; byte[][] splits = new byte[18000][]; for (FileStatus stat : familyDirStatuses) { if (!stat.isDir()) { continue; } Path familyDir = stat.getPath(); // Skip _logs, etc if (familyDir.getName().startsWith("_")) continue; //byte[] family = familyDir.getName().getBytes(); Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir)); for (Path hfile : hfiles) { if (hfile.getName().startsWith("_")) continue; HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf)); //HFile.Reader hfr = new HFile.Reader(fs, hfile, null, false); final byte[] first; try { hfr.loadFileInfo(); first = hfr.getFirstRowKey(); } finally { hfr.close(); } splits[length] = first.clone(); length++; } } //System.out.println(length); byte[][] splits1 = new byte[length][]; for (int i = 0; i < splits1.length; i++) { splits1[i] = splits[i]; } Arrays.sort(splits1, Bytes.BYTES_COMPARATOR); //HTableDescriptor desc = new HTableDescriptor("H2RDF"); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME); HColumnDescriptor family = new HColumnDescriptor("A"); desc.addFamily(family); //for (int i = 0; i < splits.length; i++) { // System.out.println(Bytes.toStringBinary(splits[i])); //} conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME)) { hadmin.disableTable(TABLE_NAME); hadmin.deleteTable(TABLE_NAME); } else { hadmin.createTable(desc, splits1); } //hadmin.createTable(desc); String[] args1 = new String[2]; args1[0] = "out"; args1[1] = TABLE_NAME; //args1[1]="new2"; ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1); }
From source file:ca.uwaterloo.iss4e.spark.pointperrow.CosineMain.java
License:Open Source License
public void fetch(JavaSparkContext sc, String source) { try {/*from ww w . ja v a 2 s . c o m*/ FileSystem fs = FileSystem.get(new URI(source), new Configuration()); Path src = new Path(source); if (fs.exists(src)) { FileStatus[] lists = fs.listStatus(src); readFiles(sc, fs, lists); } } catch (IOException e) { e.printStackTrace(); } catch (URISyntaxException e) { e.printStackTrace(); } }
From source file:ca.uwaterloo.iss4e.spark.pointperrow.CosineMain.java
License:Open Source License
public void readFiles(JavaSparkContext sc, FileSystem fs, FileStatus[] files) { for (int i = 0; i < files.length; i++) { if (files[i].isDirectory()) { try { readFiles(sc, fs, fs.listStatus(files[i].getPath())); } catch (IOException e) { e.printStackTrace();/*from w w w . j a va 2s .co m*/ } } else { if (lines == null) { Path p = files[i].getPath(); lines = sc.textFile(p.toString()); } else { JavaRDD<String> r = sc.textFile(files[i].getPath().toString()); lines.union(r); } } } }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
private static void moveTaskOutputs(JobConf conf, FileSystem fs, Path jobOutputDir, Path taskOutput) throws IOException { String taskId = conf.get("mapred.task.id"); if (fs.isFile(taskOutput)) { Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf)); if (!fs.rename(taskOutput, finalOutputPath)) { if (!fs.delete(finalOutputPath, true)) { throw new IOException("Failed to delete earlier output of task: " + taskId); }/*from w ww . j av a 2 s.c om*/ if (!fs.rename(taskOutput, finalOutputPath)) { throw new IOException("Failed to save output of task: " + taskId); } } LOG.debug("Moved " + taskOutput + " to " + finalOutputPath); } else if (fs.getFileStatus(taskOutput).isDir()) { FileStatus[] paths = fs.listStatus(taskOutput); Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf)); fs.mkdirs(finalOutputPath); if (paths != null) { for (FileStatus path : paths) { moveTaskOutputs(conf, fs, jobOutputDir, path.getPath()); } } } }
From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java
License:Open Source License
private static void moveTaskOutputs(Configuration conf, FileSystem fs, Path jobOutputDir, Path taskOutput) throws IOException { String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id")); if (fs.isFile(taskOutput)) { Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf)); if (!fs.rename(taskOutput, finalOutputPath)) { if (!fs.delete(finalOutputPath, true)) throw new IOException("Failed to delete earlier output of task: " + taskId); if (!fs.rename(taskOutput, finalOutputPath)) throw new IOException("Failed to save output of task: " + taskId); }/*from w w w. j a va2 s. c o m*/ LOG.debug("Moved {} to {}", taskOutput, finalOutputPath); } else if (fs.getFileStatus(taskOutput).isDir()) { FileStatus[] paths = fs.listStatus(taskOutput); Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf)); fs.mkdirs(finalOutputPath); if (paths != null) { for (FileStatus path : paths) moveTaskOutputs(conf, fs, jobOutputDir, path.getPath()); } } }
From source file:cc.solr.lucene.store.hdfs.ChangeFileExt.java
License:Apache License
public static void main(String[] args) throws IOException { Path p = new Path(args[0]); FileSystem fileSystem = FileSystem.get(p.toUri(), new Configuration()); FileStatus[] listStatus = fileSystem.listStatus(p); for (FileStatus fileStatus : listStatus) { Path path = fileStatus.getPath(); fileSystem.rename(path, new Path(path.toString() + ".lf")); }//from w w w .j a v a 2s . c om }
From source file:cc.solr.lucene.store.hdfs.ConvertDirectory.java
License:Apache License
public static void convert(Path path) throws IOException { FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration()); if (!fileSystem.exists(path)) { System.out.println(path + " does not exists."); return;// w ww . j av a 2 s .c o m } FileStatus fileStatus = fileSystem.getFileStatus(path); if (fileStatus.isDir()) { FileStatus[] listStatus = fileSystem.listStatus(path); for (FileStatus status : listStatus) { convert(status.getPath()); } } else { System.out.println("Converting file [" + path + "]"); HdfsMetaBlock block = new HdfsMetaBlock(); block.realPosition = 0; block.logicalPosition = 0; block.length = fileStatus.getLen(); FSDataOutputStream outputStream = fileSystem.append(path); block.write(outputStream); outputStream.writeInt(1); outputStream.writeLong(fileStatus.getLen()); outputStream.writeInt(HdfsFileWriter.VERSION); outputStream.close(); } }
From source file:clone.ReadSequenceFile.java
License:Apache License
private static int readSequenceFilesInDir(Path path, FileSystem fs, int max) { int n = 0;/*from w w w.j av a 2s.co m*/ try { FileStatus[] stat = fs.listStatus(path); for (int i = 0; i < stat.length; ++i) { n += readSequenceFile(stat[i].getPath(), fs, max); } } catch (IOException e) { e.printStackTrace(); } System.out.println(n + " records read in total."); return n; }