List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:cmd.download.java
License:Apache License
private void mergeToLocalFile(FileSystem fs, Path src, String outPath, Configuration configuration) throws FileNotFoundException, IOException { FileStatus[] status = fs.listStatus(src); Map<String, Path> paths = new TreeMap<String, Path>(); for (FileStatus fileStatus : status) { Path path = fileStatus.getPath(); String pathName = path.getName(); if (pathName.startsWith(Constants.NAME_SECOND)) { paths.put(pathName, path);/*from w w w . ja va 2 s.c om*/ } } File outFile = new File(outPath, Names.indexId2Node + ".dat"); OutputStream out = new FileOutputStream(outFile); for (String pathName : paths.keySet()) { Path path = new Path(src, paths.get(pathName)); log.debug("Concatenating {} into {}...", path.toUri(), outFile.getAbsoluteFile()); InputStream in = fs.open(new Path(path, Names.indexId2Node + ".dat")); IOUtils.copyBytes(in, out, configuration, false); in.close(); } out.close(); }
From source file:cmd.download.java
License:Apache License
private void mergeToLocalFile2(FileSystem fs, Path src, String outPath, Configuration configuration) throws FileNotFoundException, IOException { // Find all the right paths and copy .gz files locally FileStatus[] status = fs.listStatus(src); Map<String, Path> paths = new TreeMap<String, Path>(); for (FileStatus fileStatus : status) { Path path = fileStatus.getPath(); String pathName = path.getName(); if (pathName.startsWith(Constants.NAME_FOURTH)) { paths.put(pathName, path);/*from w w w .j a v a2s .com*/ } } for (String pathName : paths.keySet()) { Path path = new Path(src, paths.get(pathName)); status = fs.listStatus(path); for (FileStatus fileStatus : status) { Path p = fileStatus.getPath(); log.debug("Copying {} to {}...", p.toUri(), outPath); fs.copyToLocalFile(p, new Path(outPath, p.getName())); } } // Merge .gz files into indexName.gz File fileOutputPath = new File(outPath); File[] files = fileOutputPath.listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return pathname.getName().endsWith(".gz"); } }); Arrays.sort(files); String prevIndexName = null; OutputStream out = null; for (File file : files) { log.debug("Processing {}... ", file.getName()); String indexName = file.getName().substring(0, file.getName().indexOf("_")); if (prevIndexName == null) prevIndexName = indexName; if (out == null) out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz"))); if (!prevIndexName.equals(indexName)) { if (out != null) out.close(); log.debug("Index name set to {}", indexName); out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz"))); } InputStream in = new GZIPInputStream(new FileInputStream(file)); log.debug("Copying {} into {}.gz ...", file.getName(), indexName); IOUtils.copyBytes(in, out, 8192, false); in.close(); file.delete(); prevIndexName = indexName; } if (out != null) out.close(); // build B+Tree indexes Location location = new Location(outPath); for (String idxName : Constants.indexNames) { log.debug("Creating {} index...", idxName); String indexFilename = location.absolute(idxName, "gz"); if (new File(indexFilename).exists()) { new File(outPath, idxName + ".dat").delete(); new File(outPath, idxName + ".idn").delete(); CmdIndexBuild.main(location.getDirectoryPath(), idxName, indexFilename); // To save some disk space new File(indexFilename).delete(); } } }
From source file:cmd.tdbloader4.java
License:Apache License
private void createOffsetsFile(FileSystem fs, String input, String output) throws IOException { log.debug("Creating offsets file..."); Map<Long, Long> offsets = new TreeMap<Long, Long>(); FileStatus[] status = fs.listStatus(new Path(input)); for (FileStatus fileStatus : status) { Path file = fileStatus.getPath(); if (file.getName().startsWith("part-r-")) { log.debug("Processing: {}", file.getName()); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(file))); String line = in.readLine(); String[] tokens = line.split("\\s"); long partition = Long.valueOf(tokens[0]); long offset = Long.valueOf(tokens[1]); log.debug("Partition {} has offset {}", partition, offset); offsets.put(partition, offset); }/*from w w w .j av a2s. co m*/ } Path outputPath = new Path(output, Constants.OFFSETS_FILENAME); PrintWriter out = new PrintWriter(new OutputStreamWriter(fs.create(outputPath))); for (Long partition : offsets.keySet()) { out.println(partition + "\t" + offsets.get(partition)); } out.close(); log.debug("Offset file created."); }
From source file:cn.lhfei.hadoop.ch03.ListStatus.java
License:Apache License
public static void main(String[] args) { String uri = args[0];//from ww w . j a v a2s .co m Configuration conf = new Configuration(); FileSystem fs = null; try { fs = FileSystem.get(URI.create(uri), conf); Path[] paths = new Path[args.length]; for (int i = 0; i < paths.length; i++) { paths[i] = new Path(args[i]); } FileStatus[] status = fs.listStatus(paths); Path[] listPath = FileUtil.stat2Paths(status); for (Path p : listPath) { log.info(p.toString()); } } catch (IOException e) { e.printStackTrace(); } }
From source file:co.cask.cdap.data.stream.StreamInputSplitFinder.java
License:Apache License
/** * Get the input splits for a stream./* w w w . jav a 2s . c om*/ * * @param conf Configuration of the filesystem the stream resides on. * @return List of input splits for the stream. * @throws IOException */ public List<T> getSplits(Configuration conf) throws IOException { List<T> splits = Lists.newArrayList(); // Collects all stream event files timestamp, size and block locations information // First grab all directories (partition) that matches with the time range. FileSystem fs = path.getFileSystem(conf); for (FileStatus partitionStatus : fs.listStatus(path)) { // partition should be directory String pathName = partitionStatus.getPath().getName(); if (!partitionStatus.isDirectory() || !StreamUtils.isPartition(pathName)) { continue; } // Match the time range long partitionStartTime = StreamUtils.getPartitionStartTime(pathName); long partitionEndTime = StreamUtils.getPartitionEndTime(pathName); if (partitionStartTime > endTime || partitionEndTime <= startTime) { continue; } // Collects all bucket file status in the partition. Collection<StreamDataFileSplitter> eventFiles = collectBuckets(fs, partitionStatus.getPath()); // For each bucket inside the partition directory, compute the splits for (StreamDataFileSplitter splitter : eventFiles) { splitter.computeSplits(fs, minSplitSize, maxSplitSize, startTime, endTime, splits, splitFactory); } } return splits; }
From source file:co.cask.cdap.data.stream.StreamInputSplitFinder.java
License:Apache License
/** * Collects file status of all buckets under a given partition. *///from w w w .j a v a 2 s. c o m private Collection<StreamDataFileSplitter> collectBuckets(FileSystem fs, Path partitionPath) throws IOException { ImmutableList.Builder<StreamDataFileSplitter> builder = ImmutableList.builder(); for (FileStatus fileStatus : fs.listStatus(partitionPath)) { if (StreamFileType.EVENT.isMatched(fileStatus.getPath().getName())) { builder.add(new StreamDataFileSplitter(fileStatus)); } } return builder.build(); }
From source file:co.cask.cdap.data.tools.ReplicationStatusTool.java
License:Apache License
private static void addAllDirFiles(Path filePath, FileSystem fs, List<String> fileList) throws IOException { FileStatus[] fileStatus = fs.listStatus(filePath); for (FileStatus fileStat : fileStatus) { if (fileStat.isDirectory()) { addAllDirFiles(fileStat.getPath(), fs, fileList); } else {/*from w w w .ja v a 2 s. c om*/ fileList.add(fileStat.getPath().toString()); } } }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java
License:Apache License
/** * Merge two paths together. Anything in from will be moved into to, if there * are any name conflicts while merging the files or directories in from win. * @param fs the File System to use//from www . j a v a 2 s.c o m * @param from the path data is coming from. * @param to the path data is going to. * @throws IOException on any error */ private void mergePaths(FileSystem fs, final FileStatus from, final Path to) throws IOException { if (from.isFile()) { if (fs.exists(to)) { if (!fs.delete(to, true)) { throw new IOException("Failed to delete " + to); } } if (!fs.rename(from.getPath(), to)) { throw new IOException("Failed to rename " + from + " to " + to); } } else if (from.isDirectory()) { if (fs.exists(to)) { FileStatus toStat = fs.getFileStatus(to); if (!toStat.isDirectory()) { if (!fs.delete(to, true)) { throw new IOException("Failed to delete " + to); } if (!fs.rename(from.getPath(), to)) { throw new IOException("Failed to rename " + from + " to " + to); } } else { //It is a directory so merge everything in the directories for (FileStatus subFrom : fs.listStatus(from.getPath())) { Path subTo = new Path(to, subFrom.getPath().getName()); mergePaths(fs, subFrom, subTo); } } } else { //it does not exist just rename if (!fs.rename(from.getPath(), to)) { throw new IOException("Failed to rename " + from + " to " + to); } } } }
From source file:co.cask.hydrator.plugin.batch.action.FileAction.java
License:Apache License
@SuppressWarnings("ConstantConditions") @Override/*from w w w .ja va 2 s . co m*/ public void run(BatchActionContext context) throws Exception { if (!config.shouldRun(context)) { return; } config.substituteMacros(context); Job job = JobUtils.createInstance(); Configuration conf = job.getConfiguration(); FileSystem fileSystem = FileSystem.get(conf); Path[] paths; Path sourcePath = new Path(config.path); if (fileSystem.isDirectory(sourcePath)) { FileStatus[] status = fileSystem.listStatus(sourcePath); paths = FileUtil.stat2Paths(status); } else { paths = new Path[] { sourcePath }; } //get regex pattern for file name filtering. boolean patternSpecified = !Strings.isNullOrEmpty(config.pattern); if (patternSpecified) { regex = Pattern.compile(config.pattern); } switch (config.action.toLowerCase()) { case "delete": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { fileSystem.delete(path, true); } } break; case "move": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { Path targetFileMovePath = new Path(config.targetFolder, path.getName()); fileSystem.rename(path, targetFileMovePath); } } break; case "archive": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { try (FSDataOutputStream archivedStream = fileSystem .create(new Path(config.targetFolder, path.getName() + ".zip")); ZipOutputStream zipArchivedStream = new ZipOutputStream(archivedStream); FSDataInputStream fdDataInputStream = fileSystem.open(path)) { zipArchivedStream.putNextEntry(new ZipEntry(path.getName())); int length; byte[] buffer = new byte[1024]; while ((length = fdDataInputStream.read(buffer)) > 0) { zipArchivedStream.write(buffer, 0, length); } zipArchivedStream.closeEntry(); } fileSystem.delete(path, true); } } break; default: LOG.warn("No action required on the file."); break; } }
From source file:co.cask.hydrator.plugin.db.batch.action.VerticaBulkImportAction.java
License:Apache License
@Override public void run(ActionContext context) throws Exception { Object driver = Class.forName("com.vertica.jdbc.Driver").newInstance(); DriverManager.registerDriver((Driver) driver); Preconditions.checkArgument(tableExists(config.tableName), "Table %s does not exist. Please check that the 'tableName' property " + "has been set correctly, and that the connection string %s points to a valid database.", config.tableName, config.connectionString); String copyStatement;/*from w w w . j a v a 2s. co m*/ if (config.level.equalsIgnoreCase("basic")) { // COPY tableName FROM STDIN DELIMITER 'delimiter' copyStatement = String.format("COPY %s FROM STDIN DELIMITER '%s'", config.tableName, config.delimiter); } else { copyStatement = config.copyStatement; } LOG.debug("Copy statement is: {}", copyStatement); try { try (Connection connection = DriverManager.getConnection(config.connectionString, config.user, config.password)) { connection.setAutoCommit(false); // run Copy statement VerticaCopyStream stream = new VerticaCopyStream((VerticaConnection) connection, copyStatement); // Keep running count of the number of rejects int totalRejects = 0; // start() starts the stream process, and opens the COPY command. stream.start(); FileSystem fs = FileSystem.get(new Configuration()); List<String> fileList = new ArrayList<>(); FileStatus[] fileStatus; try { fileStatus = fs.listStatus(new Path(config.path)); for (FileStatus fileStat : fileStatus) { fileList.add(fileStat.getPath().toString()); } } catch (FileNotFoundException e) { throw new IllegalArgumentException(String.format(String.format( "Path %s not found on file system. Please provide correct path.", config.path), e)); } if (fileStatus.length <= 0) { LOG.warn("No files available to load into vertica database"); } for (String file : fileList) { Path path = new Path(file); FSDataInputStream inputStream = fs.open(path); // Add stream to the VerticaCopyStream stream.addStream(inputStream); // call execute() to load the newly added stream. You could // add many streams and call execute once to load them all. // Which method you choose depends mainly on whether you want // the ability to check the number of rejections as the load // progresses so you can stop if the number of rejects gets too // high. Also, high numbers of InputStreams could create a // resource issue on your client system. stream.execute(); // Show any rejects from this execution of the stream load // getRejects() returns a List containing the // row numbers of rejected rows. List<Long> rejects = stream.getRejects(); // The size of the list gives you the number of rejected rows. int numRejects = rejects.size(); totalRejects += numRejects; if (config.autoCommit.equalsIgnoreCase("true")) { // Commit the loaded data connection.commit(); } } // Finish closes the COPY command. It returns the number of // rows inserted. long results = stream.finish(); context.getMetrics().gauge("num.of.rows.rejected", totalRejects); context.getMetrics().gauge("num.of.rows.inserted", results); // Commit the loaded data connection.commit(); } } catch (Exception e) { throw new RuntimeException(String.format("Exception while running copy statement %s", copyStatement), e); } finally { DriverManager.deregisterDriver((Driver) driver); } }