Example usage for org.apache.hadoop.fs FileSystem isFile

List of usage examples for org.apache.hadoop.fs FileSystem isFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isFile.

Prototype

@Deprecated
public boolean isFile(Path f) throws IOException 

Source Link

Document

True iff the named path is a regular file.

Usage

From source file:org.apache.hama.examples.Kmeans.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 4 || (args.length > 4 && args.length != 7)) {
        System.out.println(//from   ww  w .  j  a  v  a  2 s .  c  om
                "USAGE: <INPUT_PATH> <OUTPUT_PATH> <MAXITERATIONS> <K (how many centers)> -g [<COUNT> <DIMENSION OF VECTORS>]");
        return;
    }
    HamaConfiguration conf = new HamaConfiguration();

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileSystem fs = FileSystem.get(conf);
    Path center = null;
    if (fs.isFile(in)) {
        center = new Path(in.getParent(), "center/cen.seq");
    } else {
        center = new Path(in, "center/cen.seq");
    }
    Path centerOut = new Path(out, "center/center_output.seq");
    conf.set(KMeansBSP.CENTER_IN_PATH, center.toString());
    conf.set(KMeansBSP.CENTER_OUT_PATH, centerOut.toString());
    int iterations = Integer.parseInt(args[2]);
    conf.setInt(KMeansBSP.MAX_ITERATIONS_KEY, iterations);
    int k = Integer.parseInt(args[3]);
    if (args.length == 7 && args[4].equals("-g")) {
        int count = Integer.parseInt(args[5]);
        if (k > count)
            throw new IllegalArgumentException("K can't be greater than n!");
        int dimension = Integer.parseInt(args[6]);
        System.out.println("N: " + count + " Dimension: " + dimension + " Iterations: " + iterations);
        if (!fs.isFile(in)) {
            in = new Path(in, "input.seq");
        }
        // prepare the input, like deleting old versions and creating centers
        KMeansBSP.prepareInput(count, k, dimension, conf, in, center, out, fs);
    } else {
        if (!fs.isFile(in)) {
            System.out.println("Cannot read text input file: " + in.toString());
            return;
        }
        // Set the last argument to TRUE if first column is required to be the key
        in = KMeansBSP.prepareInputText(k, conf, in, center, out, fs, true);
    }

    BSPJob job = KMeansBSP.createJob(conf, in, out, true);

    long startTime = System.currentTimeMillis();
    // just submit the job
    if (job.waitForCompletion(true)) {
        System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    }

    System.out.println("\nHere are a few lines of output:");
    List<String> results = KMeansBSP.readOutput(conf, out, fs, 4);
    for (String line : results) {
        System.out.println(line);
    }
    System.out.println("...");
}

From source file:org.apache.hama.examples.SpMV.java

License:Apache License

public static void readFromFile(String pathString, Writable result, HamaConfiguration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Reader reader = null;
    Path path = new Path(pathString);
    List<String> filePaths = new ArrayList<String>();
    if (!fs.isFile(path)) {
        FileStatus[] stats = fs.listStatus(path);
        for (FileStatus stat : stats) {
            filePaths.add(stat.getPath().toUri().getPath());
        }//w w w  .j ava2 s.c om
    } else if (fs.isFile(path)) {
        filePaths.add(path.toString());
    }

    try {
        for (String filePath : filePaths) {
            reader = new SequenceFile.Reader(fs, new Path(filePath), conf);
            IntWritable key = new IntWritable();
            reader.next(key, result);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        if (reader != null)
            reader.close();
    }
}

From source file:org.apache.hama.examples.util.WritableUtil.java

License:Apache License

/**
 * This method is used to read vector from specified path in SpMVTest. For
 * test purposes only.// w ww.  j  a  va  2s.c  o m
 * 
 * @param pathString
 *          input path for vector
 * @param result
 *          instanse of vector writable which should be filled.
 * @param conf
 *          configuration
 * @throws IOException
 */
@SuppressWarnings("deprecation")
public static void readFromFile(String pathString, Writable result, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Reader reader = null;
    Path path = new Path(pathString);
    List<String> filePaths = new ArrayList<String>();
    // TODO this deprecation should be fixed.
    if (fs.isDirectory(path)) {
        FileStatus[] stats = fs.listStatus(path);
        for (FileStatus stat : stats) {
            filePaths.add(stat.getPath().toUri().getPath());
        }
    } else if (fs.isFile(path)) {
        filePaths.add(path.toString());
    }
    try {
        for (String filePath : filePaths) {
            reader = new SequenceFile.Reader(fs, new Path(filePath), conf);
            IntWritable key = new IntWritable();
            reader.next(key, result);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        if (reader != null)
            reader.close();
    }
}

From source file:org.apache.hama.ml.kmeans.KMeansBSP.java

License:Apache License

/**
 * Reads input text files and writes it to a sequencefile.
 * //from  w w  w .j  a  va 2s. com
 * @param k
 * @param conf
 * @param txtIn
 * @param center
 * @param out
 * @param fs
 * @param hasKey true if first column is required to be the key.
 * @return the path of a sequencefile.
 * @throws IOException
 */
public static Path prepareInputText(int k, Configuration conf, Path txtIn, Path center, Path out, FileSystem fs,
        boolean hasKey) throws IOException {

    Path in;
    if (fs.isFile(txtIn)) {
        in = new Path(txtIn.getParent(), "textinput/in.seq");
    } else {
        in = new Path(txtIn, "textinput/in.seq");
    }

    if (fs.exists(out))
        fs.delete(out, true);

    if (fs.exists(center))
        fs.delete(center, true);

    if (fs.exists(in))
        fs.delete(in, true);

    final NullWritable value = NullWritable.get();

    Writer centerWriter = new SequenceFile.Writer(fs, conf, center, VectorWritable.class, NullWritable.class);

    final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, in, VectorWritable.class,
            NullWritable.class, CompressionType.NONE);

    int i = 0;

    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(txtIn)));
    String line;
    while ((line = br.readLine()) != null) {
        String[] split = line.split("\t");
        int columnLength = split.length;
        int indexPos = 0;
        if (hasKey) {
            columnLength = columnLength - 1;
            indexPos++;
        }

        DenseDoubleVector vec = new DenseDoubleVector(columnLength);
        for (int j = 0; j < columnLength; j++) {
            vec.set(j, Double.parseDouble(split[j + indexPos]));
        }

        VectorWritable vector;
        if (hasKey) {
            NamedDoubleVector named = new NamedDoubleVector(split[0], vec);
            vector = new VectorWritable(named);
        } else {
            vector = new VectorWritable(vec);
        }

        dataWriter.append(vector, value);
        if (k > i) {
            centerWriter.append(vector, value);
        }
        i++;
    }
    br.close();
    centerWriter.close();
    dataWriter.close();
    return in;
}

From source file:org.apache.hama.ml.recommendation.cf.OnlineCF.java

License:Apache License

@Override
public boolean load(String path, boolean lazy) {
    this.isLazyLoadModel = lazy;
    this.modelPath = path;
    if (lazy == false) {
        Configuration conf = new Configuration();
        Path dataPath = new Path(modelPath);

        try {/* w ww.  ja  v a  2s. c o  m*/
            FileSystem fs = dataPath.getFileSystem(conf);
            LinkedList<Path> files = new LinkedList<Path>();

            if (!fs.exists(dataPath)) {
                this.isLazyLoadModel = false;
                this.modelPath = null;
                return false;
            }

            if (!fs.isFile(dataPath)) {
                for (int i = 0; i < 100000; i++) {
                    Path partFile = new Path(modelPath + "/part-" + String.valueOf(100000 + i).substring(1, 6));
                    if (fs.exists(partFile)) {
                        files.add(partFile);
                    } else {
                        break;
                    }
                }
            } else {
                files.add(dataPath);
            }

            LOG.info("loading model from " + path);
            for (Path file : files) {
                SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);
                Text key = new Text();
                VectorWritable value = new VectorWritable();
                String strKey = null;
                Long actualKey = null;
                String firstSymbol = null;
                while (reader.next(key, value) != false) {
                    strKey = key.toString();
                    firstSymbol = strKey.substring(0, 1);
                    try {
                        actualKey = Long.valueOf(strKey.substring(1));
                    } catch (Exception e) {
                        actualKey = new Long(0);
                    }

                    if (firstSymbol.equals(OnlineCF.Settings.DFLT_MODEL_ITEM_DELIM)) {
                        modelItemFactorizedValues.put(actualKey, new VectorWritable(value));
                    } else if (firstSymbol.equals(OnlineCF.Settings.DFLT_MODEL_USER_DELIM)) {
                        modelUserFactorizedValues.put(actualKey, new VectorWritable(value));
                    } else if (firstSymbol.equals(OnlineCF.Settings.DFLT_MODEL_USER_FEATURES_DELIM)) {
                        modelUserFeatures.put(actualKey, new VectorWritable(value));
                    } else if (firstSymbol.equals(OnlineCF.Settings.DFLT_MODEL_ITEM_FEATURES_DELIM)) {
                        modelItemFeatures.put(actualKey, new VectorWritable(value));
                    } else if (firstSymbol.equals(OnlineCF.Settings.DFLT_MODEL_USER_MTX_FEATURES_DELIM)) {
                        modelUserFeatureFactorizedValues = convertVectorWritable(value);
                    } else if (firstSymbol.equals(OnlineCF.Settings.DFLT_MODEL_ITEM_MTX_FEATURES_DELIM)) {
                        modelItemFeatureFactorizedValues = convertVectorWritable(value);
                    } else {
                        // unknown
                        continue;
                    }
                }
                reader.close();
            }
            LOG.info("loaded: " + modelUserFactorizedValues.size() + " users, " + modelUserFeatures.size()
                    + " user features, " + modelItemFactorizedValues.size() + " items, "
                    + modelItemFeatures.size() + " item feature values");
        } catch (Exception e) {
            e.printStackTrace();
            this.isLazyLoadModel = false;
            this.modelPath = null;
            return false;
        }
    }
    return true;
}

From source file:org.apache.hama.pipes.util.SequenceFileDumper.java

License:Apache License

public static void main(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();//from  ww  w .  j a va  2s . co m
        return;
    }

    // Add arguments
    cli.addOption("file", false, "The Sequence File containing the Clusters", "path");
    cli.addOption("output", false, "The output file.  If not specified, dumps to the console", "path");
    cli.addOption("substring", false, "The number of chars of the FormatString() to print", "number");
    cli.addOption("count", false, "Report the count only", "number");

    Parser parser = cli.createParser();
    try {
        HamaConfiguration conf = new HamaConfiguration();
        CommandLine cmdLine = parser.parse(cli.options, args);

        if (cmdLine.hasOption("file")) {
            Path path = new Path(cmdLine.getOptionValue("file"));

            FileSystem fs = FileSystem.get(path.toUri(), conf);
            if (!fs.isFile(path)) {
                System.out.println("File does not exist: " + path.toString());
                return;
            }
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

            Writer writer;
            if (cmdLine.hasOption("output")) {
                writer = new FileWriter(cmdLine.getOptionValue("output"));
            } else {
                writer = new OutputStreamWriter(System.out);
            }

            writer.append("Input Path: ").append(String.valueOf(path)).append(LINE_SEP);

            int sub = Integer.MAX_VALUE;
            if (cmdLine.hasOption("substring")) {
                sub = Integer.parseInt(cmdLine.getOptionValue("substring"));
            }

            Writable key;
            if (reader.getKeyClass() != NullWritable.class) {
                key = (Writable) reader.getKeyClass().newInstance();
            } else {
                key = NullWritable.get();
            }
            Writable value;
            if (reader.getValueClass() != NullWritable.class) {
                value = (Writable) reader.getValueClass().newInstance();
            } else {
                value = NullWritable.get();
            }

            writer.append("Key class: ").append(String.valueOf(reader.getKeyClass())).append(" Value Class: ")
                    .append(String.valueOf(value.getClass())).append(LINE_SEP);
            writer.flush();

            long count = 0;
            boolean countOnly = cmdLine.hasOption("count");
            if (countOnly == false) {
                while (reader.next(key, value)) {
                    writer.append("Key: ").append(String.valueOf(key));
                    String str = value.toString();
                    writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str);
                    writer.write(LINE_SEP);
                    writer.flush();
                    count++;
                }
                writer.append("Count: ").append(String.valueOf(count)).append(LINE_SEP);

            } else { // count only
                while (reader.next(key, value)) {
                    count++;
                }
                writer.append("Count: ").append(String.valueOf(count)).append(LINE_SEP);
            }
            writer.flush();

            if (cmdLine.hasOption("output")) {
                writer.close();
            }
            reader.close();

        } else {
            cli.printUsage();
        }

    } catch (ParseException e) {
        LOG.error(e.getMessage());
        cli.printUsage();
        return;
    }
}

From source file:org.apache.hcatalog.mapreduce.FileOutputCommitterContainer.java

License:Apache License

/**
 * Move all of the files from the temp directory to the final location
 * @param fs the output file system/*w  ww.  j  a  va 2 s.c o m*/
 * @param file the file to move
 * @param srcDir the source directory
 * @param destDir the target directory
 * @param dryRun - a flag that simply tests if this move would succeed or not based
 *                 on whether other files exist where we're trying to copy
 * @throws java.io.IOException
 */
private void moveTaskOutputs(FileSystem fs, Path file, Path srcDir, Path destDir, final boolean dryRun)
        throws IOException {

    if (file.getName().equals(TEMP_DIR_NAME) || file.getName().equals(LOGS_DIR_NAME)
            || file.getName().equals(SUCCEEDED_FILE_NAME)) {
        return;
    }
    final Path finalOutputPath = getFinalPath(file, srcDir, destDir);
    if (fs.isFile(file)) {
        if (dryRun) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Testing if moving file: [" + file + "] to [" + finalOutputPath
                        + "] would cause a problem");
            }
            if (fs.exists(finalOutputPath)) {
                throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                        "Data already exists in " + finalOutputPath + ", duplicate publish not possible.");
            }
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Moving file: [ " + file + "] to [" + finalOutputPath + "]");
            }
            // Make sure the parent directory exists.  It is not an error
            // to recreate an existing directory
            fs.mkdirs(finalOutputPath.getParent());
            if (!fs.rename(file, finalOutputPath)) {
                if (!fs.delete(finalOutputPath, true)) {
                    throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                            "Failed to delete existing path " + finalOutputPath);
                }
                if (!fs.rename(file, finalOutputPath)) {
                    throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                            "Failed to move output to " + finalOutputPath);
                }
            }
        }
    } else if (fs.getFileStatus(file).isDir()) {
        FileStatus[] children = fs.listStatus(file);
        FileStatus firstChild = null;
        if (children != null) {
            int index = 0;
            while (index < children.length) {
                if (!children[index].getPath().getName().equals(TEMP_DIR_NAME)
                        && !children[index].getPath().getName().equals(LOGS_DIR_NAME)
                        && !children[index].getPath().getName().equals(SUCCEEDED_FILE_NAME)) {
                    firstChild = children[index];
                    break;
                }
                index++;
            }
        }
        if (firstChild != null && firstChild.isDir()) {
            // If the first child is directory, then rest would be directory too according to HCatalog dir structure
            // recurse in that case
            for (FileStatus child : children) {
                moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun);
            }
        } else {

            if (!dryRun) {
                if (dynamicPartitioningUsed) {
                    // Optimization: if the first child is file, we have reached the leaf directory, move the parent directory itself
                    // instead of moving each file under the directory. See HCATALOG-538

                    final Path parentDir = finalOutputPath.getParent();
                    // Create the directory
                    Path placeholder = new Path(parentDir, "_placeholder");
                    if (fs.mkdirs(parentDir)) {
                        // It is weired but we need a placeholder, 
                        // otherwise rename cannot move file to the right place
                        fs.create(placeholder).close();
                    }
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Moving directory: " + file + " to " + parentDir);
                    }
                    if (!fs.rename(file, parentDir)) {
                        final String msg = "Failed to move file: " + file + " to " + parentDir;
                        LOG.error(msg);
                        throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg);
                    }
                    fs.delete(placeholder, false);
                } else {
                    // In case of no partition we have to move each file
                    for (FileStatus child : children) {
                        moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun);
                    }
                }
            } else {
                if (fs.exists(finalOutputPath)) {
                    throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                            "Data already exists in " + finalOutputPath + ", duplicate publish not possible.");
                }
            }
        }
    } else {
        // Should never happen
        final String msg = "Unknown file type being asked to be moved, erroring out";
        throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg);
    }
}

From source file:org.apache.impala.analysis.CreateTableLikeFileStmt.java

License:Apache License

/**
 * Reads the first block from the given HDFS file and returns the Parquet schema.
 * Throws Analysis exception for any failure, such as failing to read the file
 * or failing to parse the contents./* w w w  .jav  a 2  s  . c o m*/
 */
private static parquet.schema.MessageType loadParquetSchema(Path pathToFile) throws AnalysisException {
    try {
        FileSystem fs = pathToFile.getFileSystem(FileSystemUtil.getConfiguration());
        if (!fs.isFile(pathToFile)) {
            throw new AnalysisException("Cannot infer schema, path is not a file: " + pathToFile);
        }
    } catch (IOException e) {
        throw new AnalysisException("Failed to connect to filesystem:" + e);
    } catch (IllegalArgumentException e) {
        throw new AnalysisException(e.getMessage());
    }
    ParquetMetadata readFooter = null;
    try {
        readFooter = ParquetFileReader.readFooter(FileSystemUtil.getConfiguration(), pathToFile);
    } catch (FileNotFoundException e) {
        throw new AnalysisException("File not found: " + e);
    } catch (IOException e) {
        throw new AnalysisException("Failed to open file as a parquet file: " + e);
    } catch (RuntimeException e) {
        // Parquet throws a generic RuntimeException when reading a non-parquet file
        if (e.toString().contains("is not a Parquet file")) {
            throw new AnalysisException("File is not a parquet file: " + pathToFile);
        }
        // otherwise, who knows what we caught, throw it back up
        throw e;
    }
    return readFooter.getFileMetaData().getSchema();
}

From source file:org.apache.impala.analysis.ParquetHelper.java

License:Apache License

/**
 * Reads the first block from the given HDFS file and returns the Parquet schema.
 * Throws Analysis exception for any failure, such as failing to read the file
 * or failing to parse the contents.//  www .  j  av a 2 s  .co  m
 */
private static org.apache.parquet.schema.MessageType loadParquetSchema(Path pathToFile)
        throws AnalysisException {
    try {
        FileSystem fs = pathToFile.getFileSystem(FileSystemUtil.getConfiguration());
        if (!fs.isFile(pathToFile)) {
            throw new AnalysisException("Cannot infer schema, path is not a file: " + pathToFile);
        }
    } catch (IOException e) {
        throw new AnalysisException("Failed to connect to filesystem:" + e);
    } catch (IllegalArgumentException e) {
        throw new AnalysisException(e.getMessage());
    }
    ParquetMetadata readFooter = null;
    try {
        readFooter = ParquetFileReader.readFooter(FileSystemUtil.getConfiguration(), pathToFile);
    } catch (FileNotFoundException e) {
        throw new AnalysisException("File not found: " + e);
    } catch (IOException e) {
        throw new AnalysisException("Failed to open file as a parquet file: " + e);
    } catch (RuntimeException e) {
        // Parquet throws a generic RuntimeException when reading a non-parquet file
        if (e.toString().contains("is not a Parquet file")) {
            throw new AnalysisException("File is not a parquet file: " + pathToFile);
        }
        // otherwise, who knows what we caught, throw it back up
        throw e;
    }
    return readFooter.getFileMetaData().getSchema();
}

From source file:org.apache.lens.server.util.ScannedPaths.java

License:Apache License

/**
 * Method that computes path of resources matching the input path or path regex pattern.
 * If provided path is a directory it additionally checks for the jar_order or glob_order file
 * that imposes ordering of resources and filters out other resources.
 *
 * Updates finalPaths List with matched paths and returns an iterator for matched paths.
 *///from  w w w. j ava 2  s.c  o  m
private List<String> getMatchedPaths(Path pt, String type) {
    List<String> finalPaths = new ArrayList<>();
    InputStream resourceOrderIStream = null;
    FileSystem fs;

    try {
        fs = pt.getFileSystem(new Configuration());
        if (fs.exists(pt)) {
            if (fs.isFile(pt)) {
                /**
                 * CASE 1 : Direct FILE provided in path
                 **/
                finalPaths.add(pt.toUri().toString());
            } else if (fs.isDirectory(pt)) {
                /**
                 * CASE 2 : DIR provided in path
                 **/
                Path resourceOrderFile;
                FileStatus[] statuses;
                List<String> newMatches;
                List<String> resources;

                resourceOrderFile = new Path(pt, "jar_order");
                /** Add everything in dir if no jar_order or glob_order is present **/
                if (!fs.exists(resourceOrderFile)) {
                    resourceOrderFile = new Path(pt, "glob_order");
                    if (!fs.exists(resourceOrderFile)) {
                        resourceOrderFile = null;
                        /** Get matched resources recursively for all files **/
                        statuses = fs.globStatus(new Path(pt, "*"));
                        if (statuses != null) {
                            for (FileStatus st : statuses) {
                                newMatches = getMatchedPaths(st.getPath(), type);
                                finalPaths.addAll(newMatches);
                            }
                        }
                    }
                }
                if (resourceOrderFile != null) {
                    /** Else get jars as per order specified in jar_order/glob_order **/
                    resourceOrderIStream = fs.open(resourceOrderFile);
                    resources = IOUtils.readLines(resourceOrderIStream, Charset.forName("UTF-8"));
                    for (String resource : resources) {
                        if (StringUtils.isBlank(resource)) {
                            continue;
                        }
                        resource = resource.trim();

                        /** Get matched resources recursively for provided path/pattern **/
                        if (resource.startsWith("/") || resource.contains(":/")) {
                            newMatches = getMatchedPaths(new Path(resource), type);
                        } else {
                            newMatches = getMatchedPaths(new Path(pt, resource), type);
                        }
                        finalPaths.addAll(newMatches);
                    }
                }
            }
        } else {
            /**
             * CASE 3 : REGEX provided in path
             * */
            FileStatus[] statuses = fs.globStatus(Path.getPathWithoutSchemeAndAuthority(pt));
            if (statuses != null) {
                for (FileStatus st : statuses) {
                    List<String> newMatches = getMatchedPaths(st.getPath(), type);
                    finalPaths.addAll(newMatches);
                }
            }
        }
        filterDirsAndJarType(fs, finalPaths);
    } catch (FileNotFoundException fex) {
        log.error("File not found while scanning path. Path: {}, Type: {}", path, type, fex);
    } catch (Exception e) {
        log.error("Exception while initializing PathScanner. Path: {}, Type: {}", path, type, e);
    } finally {
        IOUtils.closeQuietly(resourceOrderIStream);
    }

    return finalPaths;
}