List of usage examples for org.apache.hadoop.fs FileSystem isFile
@Deprecated public boolean isFile(Path f) throws IOException
From source file:org.apache.hama.examples.Kmeans.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 4 || (args.length > 4 && args.length != 7)) { System.out.println(//from ww w . j a v a 2 s . c om "USAGE: <INPUT_PATH> <OUTPUT_PATH> <MAXITERATIONS> <K (how many centers)> -g [<COUNT> <DIMENSION OF VECTORS>]"); return; } HamaConfiguration conf = new HamaConfiguration(); Path in = new Path(args[0]); Path out = new Path(args[1]); FileSystem fs = FileSystem.get(conf); Path center = null; if (fs.isFile(in)) { center = new Path(in.getParent(), "center/cen.seq"); } else { center = new Path(in, "center/cen.seq"); } Path centerOut = new Path(out, "center/center_output.seq"); conf.set(KMeansBSP.CENTER_IN_PATH, center.toString()); conf.set(KMeansBSP.CENTER_OUT_PATH, centerOut.toString()); int iterations = Integer.parseInt(args[2]); conf.setInt(KMeansBSP.MAX_ITERATIONS_KEY, iterations); int k = Integer.parseInt(args[3]); if (args.length == 7 && args[4].equals("-g")) { int count = Integer.parseInt(args[5]); if (k > count) throw new IllegalArgumentException("K can't be greater than n!"); int dimension = Integer.parseInt(args[6]); System.out.println("N: " + count + " Dimension: " + dimension + " Iterations: " + iterations); if (!fs.isFile(in)) { in = new Path(in, "input.seq"); } // prepare the input, like deleting old versions and creating centers KMeansBSP.prepareInput(count, k, dimension, conf, in, center, out, fs); } else { if (!fs.isFile(in)) { System.out.println("Cannot read text input file: " + in.toString()); return; } // Set the last argument to TRUE if first column is required to be the key in = KMeansBSP.prepareInputText(k, conf, in, center, out, fs, true); } BSPJob job = KMeansBSP.createJob(conf, in, out, true); long startTime = System.currentTimeMillis(); // just submit the job if (job.waitForCompletion(true)) { System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); } System.out.println("\nHere are a few lines of output:"); List<String> results = KMeansBSP.readOutput(conf, out, fs, 4); for (String line : results) { System.out.println(line); } System.out.println("..."); }
From source file:org.apache.hama.examples.SpMV.java
License:Apache License
public static void readFromFile(String pathString, Writable result, HamaConfiguration conf) throws IOException { FileSystem fs = FileSystem.get(conf); SequenceFile.Reader reader = null; Path path = new Path(pathString); List<String> filePaths = new ArrayList<String>(); if (!fs.isFile(path)) { FileStatus[] stats = fs.listStatus(path); for (FileStatus stat : stats) { filePaths.add(stat.getPath().toUri().getPath()); }//w w w .j ava2 s.c om } else if (fs.isFile(path)) { filePaths.add(path.toString()); } try { for (String filePath : filePaths) { reader = new SequenceFile.Reader(fs, new Path(filePath), conf); IntWritable key = new IntWritable(); reader.next(key, result); } } catch (IOException e) { throw new RuntimeException(e); } finally { if (reader != null) reader.close(); } }
From source file:org.apache.hama.examples.util.WritableUtil.java
License:Apache License
/** * This method is used to read vector from specified path in SpMVTest. For * test purposes only.// w ww. j a va 2s.c o m * * @param pathString * input path for vector * @param result * instanse of vector writable which should be filled. * @param conf * configuration * @throws IOException */ @SuppressWarnings("deprecation") public static void readFromFile(String pathString, Writable result, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); SequenceFile.Reader reader = null; Path path = new Path(pathString); List<String> filePaths = new ArrayList<String>(); // TODO this deprecation should be fixed. if (fs.isDirectory(path)) { FileStatus[] stats = fs.listStatus(path); for (FileStatus stat : stats) { filePaths.add(stat.getPath().toUri().getPath()); } } else if (fs.isFile(path)) { filePaths.add(path.toString()); } try { for (String filePath : filePaths) { reader = new SequenceFile.Reader(fs, new Path(filePath), conf); IntWritable key = new IntWritable(); reader.next(key, result); } } catch (IOException e) { throw new RuntimeException(e); } finally { if (reader != null) reader.close(); } }
From source file:org.apache.hama.ml.kmeans.KMeansBSP.java
License:Apache License
/** * Reads input text files and writes it to a sequencefile. * //from w w w .j a va 2s. com * @param k * @param conf * @param txtIn * @param center * @param out * @param fs * @param hasKey true if first column is required to be the key. * @return the path of a sequencefile. * @throws IOException */ public static Path prepareInputText(int k, Configuration conf, Path txtIn, Path center, Path out, FileSystem fs, boolean hasKey) throws IOException { Path in; if (fs.isFile(txtIn)) { in = new Path(txtIn.getParent(), "textinput/in.seq"); } else { in = new Path(txtIn, "textinput/in.seq"); } if (fs.exists(out)) fs.delete(out, true); if (fs.exists(center)) fs.delete(center, true); if (fs.exists(in)) fs.delete(in, true); final NullWritable value = NullWritable.get(); Writer centerWriter = new SequenceFile.Writer(fs, conf, center, VectorWritable.class, NullWritable.class); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, in, VectorWritable.class, NullWritable.class, CompressionType.NONE); int i = 0; BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(txtIn))); String line; while ((line = br.readLine()) != null) { String[] split = line.split("\t"); int columnLength = split.length; int indexPos = 0; if (hasKey) { columnLength = columnLength - 1; indexPos++; } DenseDoubleVector vec = new DenseDoubleVector(columnLength); for (int j = 0; j < columnLength; j++) { vec.set(j, Double.parseDouble(split[j + indexPos])); } VectorWritable vector; if (hasKey) { NamedDoubleVector named = new NamedDoubleVector(split[0], vec); vector = new VectorWritable(named); } else { vector = new VectorWritable(vec); } dataWriter.append(vector, value); if (k > i) { centerWriter.append(vector, value); } i++; } br.close(); centerWriter.close(); dataWriter.close(); return in; }
From source file:org.apache.hama.ml.recommendation.cf.OnlineCF.java
License:Apache License
@Override public boolean load(String path, boolean lazy) { this.isLazyLoadModel = lazy; this.modelPath = path; if (lazy == false) { Configuration conf = new Configuration(); Path dataPath = new Path(modelPath); try {/* w ww. ja v a 2s. c o m*/ FileSystem fs = dataPath.getFileSystem(conf); LinkedList<Path> files = new LinkedList<Path>(); if (!fs.exists(dataPath)) { this.isLazyLoadModel = false; this.modelPath = null; return false; } if (!fs.isFile(dataPath)) { for (int i = 0; i < 100000; i++) { Path partFile = new Path(modelPath + "/part-" + String.valueOf(100000 + i).substring(1, 6)); if (fs.exists(partFile)) { files.add(partFile); } else { break; } } } else { files.add(dataPath); } LOG.info("loading model from " + path); for (Path file : files) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf); Text key = new Text(); VectorWritable value = new VectorWritable(); String strKey = null; Long actualKey = null; String firstSymbol = null; while (reader.next(key, value) != false) { strKey = key.toString(); firstSymbol = strKey.substring(0, 1); try { actualKey = Long.valueOf(strKey.substring(1)); } catch (Exception e) { actualKey = new Long(0); } if (firstSymbol.equals(OnlineCF.Settings.DFLT_MODEL_ITEM_DELIM)) { modelItemFactorizedValues.put(actualKey, new VectorWritable(value)); } else if (firstSymbol.equals(OnlineCF.Settings.DFLT_MODEL_USER_DELIM)) { modelUserFactorizedValues.put(actualKey, new VectorWritable(value)); } else if (firstSymbol.equals(OnlineCF.Settings.DFLT_MODEL_USER_FEATURES_DELIM)) { modelUserFeatures.put(actualKey, new VectorWritable(value)); } else if (firstSymbol.equals(OnlineCF.Settings.DFLT_MODEL_ITEM_FEATURES_DELIM)) { modelItemFeatures.put(actualKey, new VectorWritable(value)); } else if (firstSymbol.equals(OnlineCF.Settings.DFLT_MODEL_USER_MTX_FEATURES_DELIM)) { modelUserFeatureFactorizedValues = convertVectorWritable(value); } else if (firstSymbol.equals(OnlineCF.Settings.DFLT_MODEL_ITEM_MTX_FEATURES_DELIM)) { modelItemFeatureFactorizedValues = convertVectorWritable(value); } else { // unknown continue; } } reader.close(); } LOG.info("loaded: " + modelUserFactorizedValues.size() + " users, " + modelUserFeatures.size() + " user features, " + modelItemFactorizedValues.size() + " items, " + modelItemFeatures.size() + " item feature values"); } catch (Exception e) { e.printStackTrace(); this.isLazyLoadModel = false; this.modelPath = null; return false; } } return true; }
From source file:org.apache.hama.pipes.util.SequenceFileDumper.java
License:Apache License
public static void main(String[] args) throws Exception { CommandLineParser cli = new CommandLineParser(); if (args.length == 0) { cli.printUsage();//from ww w . j a va 2s . co m return; } // Add arguments cli.addOption("file", false, "The Sequence File containing the Clusters", "path"); cli.addOption("output", false, "The output file. If not specified, dumps to the console", "path"); cli.addOption("substring", false, "The number of chars of the FormatString() to print", "number"); cli.addOption("count", false, "Report the count only", "number"); Parser parser = cli.createParser(); try { HamaConfiguration conf = new HamaConfiguration(); CommandLine cmdLine = parser.parse(cli.options, args); if (cmdLine.hasOption("file")) { Path path = new Path(cmdLine.getOptionValue("file")); FileSystem fs = FileSystem.get(path.toUri(), conf); if (!fs.isFile(path)) { System.out.println("File does not exist: " + path.toString()); return; } SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); Writer writer; if (cmdLine.hasOption("output")) { writer = new FileWriter(cmdLine.getOptionValue("output")); } else { writer = new OutputStreamWriter(System.out); } writer.append("Input Path: ").append(String.valueOf(path)).append(LINE_SEP); int sub = Integer.MAX_VALUE; if (cmdLine.hasOption("substring")) { sub = Integer.parseInt(cmdLine.getOptionValue("substring")); } Writable key; if (reader.getKeyClass() != NullWritable.class) { key = (Writable) reader.getKeyClass().newInstance(); } else { key = NullWritable.get(); } Writable value; if (reader.getValueClass() != NullWritable.class) { value = (Writable) reader.getValueClass().newInstance(); } else { value = NullWritable.get(); } writer.append("Key class: ").append(String.valueOf(reader.getKeyClass())).append(" Value Class: ") .append(String.valueOf(value.getClass())).append(LINE_SEP); writer.flush(); long count = 0; boolean countOnly = cmdLine.hasOption("count"); if (countOnly == false) { while (reader.next(key, value)) { writer.append("Key: ").append(String.valueOf(key)); String str = value.toString(); writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str); writer.write(LINE_SEP); writer.flush(); count++; } writer.append("Count: ").append(String.valueOf(count)).append(LINE_SEP); } else { // count only while (reader.next(key, value)) { count++; } writer.append("Count: ").append(String.valueOf(count)).append(LINE_SEP); } writer.flush(); if (cmdLine.hasOption("output")) { writer.close(); } reader.close(); } else { cli.printUsage(); } } catch (ParseException e) { LOG.error(e.getMessage()); cli.printUsage(); return; } }
From source file:org.apache.hcatalog.mapreduce.FileOutputCommitterContainer.java
License:Apache License
/** * Move all of the files from the temp directory to the final location * @param fs the output file system/*w ww. j a va 2 s.c o m*/ * @param file the file to move * @param srcDir the source directory * @param destDir the target directory * @param dryRun - a flag that simply tests if this move would succeed or not based * on whether other files exist where we're trying to copy * @throws java.io.IOException */ private void moveTaskOutputs(FileSystem fs, Path file, Path srcDir, Path destDir, final boolean dryRun) throws IOException { if (file.getName().equals(TEMP_DIR_NAME) || file.getName().equals(LOGS_DIR_NAME) || file.getName().equals(SUCCEEDED_FILE_NAME)) { return; } final Path finalOutputPath = getFinalPath(file, srcDir, destDir); if (fs.isFile(file)) { if (dryRun) { if (LOG.isDebugEnabled()) { LOG.debug("Testing if moving file: [" + file + "] to [" + finalOutputPath + "] would cause a problem"); } if (fs.exists(finalOutputPath)) { throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath + ", duplicate publish not possible."); } } else { if (LOG.isDebugEnabled()) { LOG.debug("Moving file: [ " + file + "] to [" + finalOutputPath + "]"); } // Make sure the parent directory exists. It is not an error // to recreate an existing directory fs.mkdirs(finalOutputPath.getParent()); if (!fs.rename(file, finalOutputPath)) { if (!fs.delete(finalOutputPath, true)) { throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to delete existing path " + finalOutputPath); } if (!fs.rename(file, finalOutputPath)) { throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to move output to " + finalOutputPath); } } } } else if (fs.getFileStatus(file).isDir()) { FileStatus[] children = fs.listStatus(file); FileStatus firstChild = null; if (children != null) { int index = 0; while (index < children.length) { if (!children[index].getPath().getName().equals(TEMP_DIR_NAME) && !children[index].getPath().getName().equals(LOGS_DIR_NAME) && !children[index].getPath().getName().equals(SUCCEEDED_FILE_NAME)) { firstChild = children[index]; break; } index++; } } if (firstChild != null && firstChild.isDir()) { // If the first child is directory, then rest would be directory too according to HCatalog dir structure // recurse in that case for (FileStatus child : children) { moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); } } else { if (!dryRun) { if (dynamicPartitioningUsed) { // Optimization: if the first child is file, we have reached the leaf directory, move the parent directory itself // instead of moving each file under the directory. See HCATALOG-538 final Path parentDir = finalOutputPath.getParent(); // Create the directory Path placeholder = new Path(parentDir, "_placeholder"); if (fs.mkdirs(parentDir)) { // It is weired but we need a placeholder, // otherwise rename cannot move file to the right place fs.create(placeholder).close(); } if (LOG.isDebugEnabled()) { LOG.debug("Moving directory: " + file + " to " + parentDir); } if (!fs.rename(file, parentDir)) { final String msg = "Failed to move file: " + file + " to " + parentDir; LOG.error(msg); throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); } fs.delete(placeholder, false); } else { // In case of no partition we have to move each file for (FileStatus child : children) { moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); } } } else { if (fs.exists(finalOutputPath)) { throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath + ", duplicate publish not possible."); } } } } else { // Should never happen final String msg = "Unknown file type being asked to be moved, erroring out"; throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); } }
From source file:org.apache.impala.analysis.CreateTableLikeFileStmt.java
License:Apache License
/** * Reads the first block from the given HDFS file and returns the Parquet schema. * Throws Analysis exception for any failure, such as failing to read the file * or failing to parse the contents./* w w w .jav a 2 s . c o m*/ */ private static parquet.schema.MessageType loadParquetSchema(Path pathToFile) throws AnalysisException { try { FileSystem fs = pathToFile.getFileSystem(FileSystemUtil.getConfiguration()); if (!fs.isFile(pathToFile)) { throw new AnalysisException("Cannot infer schema, path is not a file: " + pathToFile); } } catch (IOException e) { throw new AnalysisException("Failed to connect to filesystem:" + e); } catch (IllegalArgumentException e) { throw new AnalysisException(e.getMessage()); } ParquetMetadata readFooter = null; try { readFooter = ParquetFileReader.readFooter(FileSystemUtil.getConfiguration(), pathToFile); } catch (FileNotFoundException e) { throw new AnalysisException("File not found: " + e); } catch (IOException e) { throw new AnalysisException("Failed to open file as a parquet file: " + e); } catch (RuntimeException e) { // Parquet throws a generic RuntimeException when reading a non-parquet file if (e.toString().contains("is not a Parquet file")) { throw new AnalysisException("File is not a parquet file: " + pathToFile); } // otherwise, who knows what we caught, throw it back up throw e; } return readFooter.getFileMetaData().getSchema(); }
From source file:org.apache.impala.analysis.ParquetHelper.java
License:Apache License
/** * Reads the first block from the given HDFS file and returns the Parquet schema. * Throws Analysis exception for any failure, such as failing to read the file * or failing to parse the contents.// www . j av a 2 s .co m */ private static org.apache.parquet.schema.MessageType loadParquetSchema(Path pathToFile) throws AnalysisException { try { FileSystem fs = pathToFile.getFileSystem(FileSystemUtil.getConfiguration()); if (!fs.isFile(pathToFile)) { throw new AnalysisException("Cannot infer schema, path is not a file: " + pathToFile); } } catch (IOException e) { throw new AnalysisException("Failed to connect to filesystem:" + e); } catch (IllegalArgumentException e) { throw new AnalysisException(e.getMessage()); } ParquetMetadata readFooter = null; try { readFooter = ParquetFileReader.readFooter(FileSystemUtil.getConfiguration(), pathToFile); } catch (FileNotFoundException e) { throw new AnalysisException("File not found: " + e); } catch (IOException e) { throw new AnalysisException("Failed to open file as a parquet file: " + e); } catch (RuntimeException e) { // Parquet throws a generic RuntimeException when reading a non-parquet file if (e.toString().contains("is not a Parquet file")) { throw new AnalysisException("File is not a parquet file: " + pathToFile); } // otherwise, who knows what we caught, throw it back up throw e; } return readFooter.getFileMetaData().getSchema(); }
From source file:org.apache.lens.server.util.ScannedPaths.java
License:Apache License
/** * Method that computes path of resources matching the input path or path regex pattern. * If provided path is a directory it additionally checks for the jar_order or glob_order file * that imposes ordering of resources and filters out other resources. * * Updates finalPaths List with matched paths and returns an iterator for matched paths. *///from w w w. j ava 2 s.c o m private List<String> getMatchedPaths(Path pt, String type) { List<String> finalPaths = new ArrayList<>(); InputStream resourceOrderIStream = null; FileSystem fs; try { fs = pt.getFileSystem(new Configuration()); if (fs.exists(pt)) { if (fs.isFile(pt)) { /** * CASE 1 : Direct FILE provided in path **/ finalPaths.add(pt.toUri().toString()); } else if (fs.isDirectory(pt)) { /** * CASE 2 : DIR provided in path **/ Path resourceOrderFile; FileStatus[] statuses; List<String> newMatches; List<String> resources; resourceOrderFile = new Path(pt, "jar_order"); /** Add everything in dir if no jar_order or glob_order is present **/ if (!fs.exists(resourceOrderFile)) { resourceOrderFile = new Path(pt, "glob_order"); if (!fs.exists(resourceOrderFile)) { resourceOrderFile = null; /** Get matched resources recursively for all files **/ statuses = fs.globStatus(new Path(pt, "*")); if (statuses != null) { for (FileStatus st : statuses) { newMatches = getMatchedPaths(st.getPath(), type); finalPaths.addAll(newMatches); } } } } if (resourceOrderFile != null) { /** Else get jars as per order specified in jar_order/glob_order **/ resourceOrderIStream = fs.open(resourceOrderFile); resources = IOUtils.readLines(resourceOrderIStream, Charset.forName("UTF-8")); for (String resource : resources) { if (StringUtils.isBlank(resource)) { continue; } resource = resource.trim(); /** Get matched resources recursively for provided path/pattern **/ if (resource.startsWith("/") || resource.contains(":/")) { newMatches = getMatchedPaths(new Path(resource), type); } else { newMatches = getMatchedPaths(new Path(pt, resource), type); } finalPaths.addAll(newMatches); } } } } else { /** * CASE 3 : REGEX provided in path * */ FileStatus[] statuses = fs.globStatus(Path.getPathWithoutSchemeAndAuthority(pt)); if (statuses != null) { for (FileStatus st : statuses) { List<String> newMatches = getMatchedPaths(st.getPath(), type); finalPaths.addAll(newMatches); } } } filterDirsAndJarType(fs, finalPaths); } catch (FileNotFoundException fex) { log.error("File not found while scanning path. Path: {}, Type: {}", path, type, fex); } catch (Exception e) { log.error("Exception while initializing PathScanner. Path: {}, Type: {}", path, type, e); } finally { IOUtils.closeQuietly(resourceOrderIStream); } return finalPaths; }