Example usage for org.apache.hadoop.fs FileSystem isDirectory

List of usage examples for org.apache.hadoop.fs FileSystem isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isDirectory.

Prototype

@Deprecated
public boolean isDirectory(Path f) throws IOException 

Source Link

Document

True iff the named path is a directory.

Usage

From source file:pl.edu.icm.coansys.statisticsgenerator.tools.ViewTool.java

License:Open Source License

private static void processFileOrDirectory(Path pt, Configuration conf) throws IOException {
    FileSystem fs = pt.getFileSystem(conf);
    if (fs.isDirectory(pt)) {
        for (FileStatus fstat : fs.listStatus(pt)) {
            processFileOrDirectory(fstat.getPath(), conf);
        }/*from  www.ja v a 2s  . c  om*/
    } else if (fs.isFile(pt)) {
        viewFile(pt, conf);
    } else {
        //zaloguj bd
    }
}

From source file:reconcile.hbase.mapreduce.KeyListInputFormat.java

License:Open Source License

@SuppressWarnings("deprecation")
@Override/*from   w w w  .  j  a  v  a  2 s. c o m*/
public List<InputSplit> getSplits(JobContext context) throws IOException {
    ArrayList<InputSplit> splits = new ArrayList<InputSplit>();
    TreeSet<String> uniqueKeys = new TreeSet<String>();

    FileSystem fs = FileSystem.get(context.getConfiguration());

    final String tableName = context.getConfiguration().get(JobConfig.TABLE_CONF);
    if (tableName == null || tableName.length() == 0)
        throw new IOException("HBase table name was not provided");
    int splitSize = Integer.parseInt(context.getConfiguration().get(SPLIT_SIZE, DEFAULT_SPLIT_SIZE));

    final int maxSplits = Integer.parseInt(context.getConfiguration().get(MAX_SPLITS, DEFAULT_MAX_SPLITS));

    Path[] files = getInputPaths(context);
    for (Path file : files) {
        if (fs.isDirectory(file) || !fs.exists(file))
            throw new IOException("Not a valid key list file: " + file.toString());

        InputStream is = null;
        BufferedReader reader = null;
        try {
            is = fs.open(file);
            reader = new BufferedReader(new InputStreamReader(is));
            String line = null;
            while ((line = reader.readLine()) != null) {
                String key = line.trim();
                if (key.length() > 0 && !key.startsWith("#")) {
                    uniqueKeys.add(key);
                }
            }
        } finally {
            IOUtils.closeQuietly(is);
            IOUtils.closeQuietly(reader);
        }
    }

    ArrayList<String> keys = new ArrayList<String>();
    keys.addAll(uniqueKeys);

    if (keys.size() > 0) {
        HConnection conn = HConnectionManager.getConnection(context.getConfiguration());

        int numSplits = Math.max((int) Math.ceil(keys.size() / (splitSize * 1.0)), 1);
        if (numSplits > maxSplits) {
            splitSize = (int) Math.ceil(keys.size() / (maxSplits * 1.0));
            LOG.info("Overriding split size with (" + splitSize + ") to maintain max splits of (" + maxSplits
                    + ")");
            numSplits = Math.max((int) Math.ceil(keys.size() / (splitSize * 1.0)), 1);
        }

        LOG.info("There are (" + keys.size() + ") total keys. Split size(" + splitSize + ")  Num splits("
                + numSplits + ")");
        int startNdx = 0;
        for (int i = 0; i < (numSplits - 1); ++i) {
            int endNdx = startNdx + splitSize;
            splits.add(createSplit(conn, tableName, keys, startNdx, endNdx));
            startNdx = endNdx;
        }
        // Add last split
        splits.add(createSplit(conn, tableName, keys, startNdx, keys.size()));
    }
    return splits;
}

From source file:simsql.code_generator.MyPhysicalDatabase.java

License:Apache License

public void restoreFrom(String fromHere) {

    try {/*from  w w  w . j a v a2s . c o m*/

        // first, we see if the directory that we are reading from
        // exists and is a directory.
        Configuration conf = new Configuration();
        FileSystem dfs = FileSystem.get(conf);

        Path pathFrom = new Path(fromHere);
        if (!dfs.exists(pathFrom) || !dfs.isDirectory(pathFrom)) {
            System.out.println("The specified restoration path does not exist or is not a directory!");
            return;
        }

        // now, get the destination path.
        Path pathTo = new Path(myDir);
        if (dfs.exists(pathTo)) {

            // destroy it, if it's there.
            dfs.delete(pathTo, true);
        }

        // make the directory
        // dfs.mkdirs(pathTo);

        // and all the paths we will be copying
        Path[] sourcePaths = FileUtil.stat2Paths(dfs.globStatus(pathFrom), pathFrom);
        for (Path sp : sourcePaths) {

            // restore all of it.
            FileUtil.copy(dfs, sp, dfs, pathTo, false, conf);
        }

    } catch (Exception e) {
        throw new RuntimeException("Could not restore data from directory " + fromHere, e);
    }
}

From source file:simsql.runtime.HDFSTableStats.java

License:Apache License

@SuppressWarnings("unchecked")
public void load(String path) throws IOException, ClassNotFoundException {

    // look up the input file...
    Path file = new Path(path);
    Configuration conf = new Configuration();
    FileSystem fs = file.getFileSystem(conf);

    // is it a directory?
    if (fs.exists(file) && fs.isDirectory(file)) {

        // if so, traverse all of it.
        clear();/* w w w .j a v a 2s.  co m*/
        for (FileStatus ff : fs.listStatus(file, new StatsFileFilter())) {

            HDFSTableStats guyMerged = new HDFSTableStats();
            guyMerged.load(ff.getPath().toUri().getPath());
            consume(guyMerged);
        }
    } else if (fs.exists(file)) {

        // otherwise, just read it in.
        FSDataInputStream fileIn = fs.open(file);
        ObjectInputStream in = new ObjectInputStream(fileIn);

        HDFSTableStats newGuy = (HDFSTableStats) in.readObject();
        in.close();

        // destroy our contents and read.
        clear();
        consume(newGuy);
    }
}

From source file:simsql.runtime.MRLoader.java

License:Apache License

public long run(String inputPath, String outputPath, short typeCode, Relation r, int sortAtt) {

    // make a directory for the relation
    Configuration conf = new Configuration();
    FileSystem dfs = null;/*from   w  w  w  .  j  a  va2 s.  c o m*/

    try {
        dfs = FileSystem.get(conf);
    } catch (Exception e) {
        throw new RuntimeException("Cannot access HDFS!", e);
    }

    try {
        // if it exists, destroy it.
        Path path = new Path(outputPath);
        if (dfs.exists(path)) {
            dfs.delete(path, true);
        }
    } catch (Exception e) {
        throw new RuntimeException("Could not create the file to bulk load to!", e);
    }

    // find a file name 
    String tempPath = null;
    if (inputPath.startsWith("hdfs:")) {
        tempPath = inputPath.replace("hdfs:", "");
    } else {
        tempPath = "/tempDataFile_" + r.getName();
        try {
            dfs.delete(new Path(tempPath), true);
        } catch (Exception e) {
            // ignore this.
        }

        // upload the text file
        try {
            dfs.copyFromLocalFile(false, true, new Path(inputPath), new Path(tempPath));
            dfs.deleteOnExit(new Path(tempPath));
        } catch (Exception e) {
            throw new RuntimeException("Failed to upload text file " + inputPath + " to HDFS!", e);
        }
    }

    // set up the new job's parameters.
    conf.setBoolean("mapred.compress.map.output", true);
    conf.set("mapred.map.output.compression.codec", RecordCompression.getCodecClass());

    conf.set("io.serializations",
            "simsql.runtime.RecordSerialization,simsql.runtime.RecordKeySerialization,org.apache.hadoop.io.serializer.WritableSerialization");
    conf.setInt("simsql.loader.numAtts", r.getAttributes().size());
    conf.setInt("simsql.loader.typeCode", (int) typeCode);
    conf.setInt("simsql.loader.sortAtt", sortAtt);

    String[] myStrings = new String[r.getAttributes().size()];
    int j = 0;
    for (simsql.compiler.Attribute a : r.getAttributes()) {
        myStrings[j++] = a.getPhysicalRealization().getClass().getName();
    }

    conf.setStrings("simsql.loader.types", myStrings);

    // create a job
    Job job;
    try {
        job = new Job(conf);
    } catch (Exception e) {
        throw new RuntimeException("Unable to create bulk loading job!", e);
    }

    // set the split size (number of mappers)
    long fSize = 0;
    if (inputPath.startsWith("hdfs")) {
        fSize = RelOp.getPathsTotalSize(new String[] { tempPath });
    } else {
        fSize = new File(inputPath).length();
    }

    FileInputFormat.setMinInputSplitSize(job, fSize / (long) numTasks);
    FileInputFormat.setMaxInputSplitSize(job, fSize / (long) numTasks);

    // and the number of reducers
    job.setNumReduceTasks(numTasks);

    // the mapper/reducer/jar
    job.setMapperClass(MRLoaderMapper.class);
    job.setReducerClass(MRLoaderReducer.class);
    job.setJarByClass(MRLoader.class);

    // I/O settings.
    job.setOutputFormatClass(RecordOutputFormat.class);

    job.setMapOutputKeyClass(RecordKey.class);
    job.setMapOutputValueClass(RecordWrapper.class);
    job.setOutputKeyClass(Nothing.class);
    job.setOutputValueClass(Record.class);
    try {
        FileInputFormat.setInputPaths(job, new Path(tempPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));
    } catch (Exception e) {
        throw new RuntimeException("Could not set job inputs/outputs", e);
    }
    job.setGroupingComparatorClass(RecordKeyGroupingComparator.class);
    job.setPartitionerClass(RecordPartitioner.class);
    job.setSortComparatorClass(RecordKeySortComparator.class);

    job.setJobName("MRLoader: " + inputPath + " ==> " + outputPath);

    // run it
    Counters counters;
    try {
        job.waitForCompletion(true);
        counters = job.getCounters();
    } catch (Exception e) {
        throw new RuntimeException("Could not set up bulk loader job!", e);
    }

    // now, delete all the empty part files
    try {

        // get a filesystem
        FileSystem ddfs = FileSystem.get(conf);
        Path outPath = new Path(outputPath);
        if (ddfs.exists(outPath) && ddfs.isDirectory(outPath)) {
            FileStatus fstatus[] = ddfs.listStatus(outPath, new TableFileFilter());
            for (FileStatus ff : fstatus) {
                if (ddfs.getContentSummary(ff.getPath()).getLength() <= 4) { // snappy leaves 4-byte long files around...
                    ddfs.delete(ff.getPath(), true);
                }
            }
        }
    } catch (Exception e) { // this isn't disastrous 
    }

    // get the counter for the output of the mapper.
    Counter bytesCounter = counters.findCounter(OutputFileSerializer.Counters.BYTES_WRITTEN);
    return bytesCounter.getValue();
}

From source file:simsql.runtime.RelOp.java

License:Apache License

public boolean run(RuntimeParameter params, boolean verbose) {

    ExampleRuntimeParameter pp = (ExampleRuntimeParameter) params;

    // build the jar.
    String jarFile = buildJarFile(params);

    // Get the default configuration object
    Configuration conf = new Configuration();

    // set quite mode on/off
    conf.setQuietMode(!verbose);/*  w ww  . j a va2s. co m*/

    /***
    conf.setBoolean("mapred.task.profile", true);
    conf.set("mapred.task.profile.params", "-agentlib:hprof=cpu=samples," +
        "heap=sites,depth=8,force=n,thread=y,verbose=n,file=%s");
    ***/

    // tell it how to serialize and deserialize records and recordkeys
    conf.set("io.serializations", getSerializations());
    conf.setBoolean("mapred.compress.map.output", true);

    int ioSortMB = conf.getInt("io.sort.mb", 256);
    conf.set("mapred.map.child.java.opts", "-Xmx" + (getMemPerMapper(params) + ioSortMB) + "m -Xms"
            + (getMemPerMapper(params))
            + "m -Duser.timezone='America/Chicago' -Djava.net.preferIPv4Stack=true -XX:CompileThreshold=10000 -XX:+DoEscapeAnalysis -XX:+UseNUMA -XX:-EliminateLocks -XX:+UseBiasedLocking -XX:+OptimizeStringConcat -XX:+UseFastAccessorMethods -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:+CMSIncrementalPacing -XX:CMSIncrementalDutyCycleMin=0 -XX:+UseCompressedOops -XX:+AggressiveOpts -XX:-UseStringCache -XX:ErrorFile=/tmp/hs_err_pid%p.log");

    conf.set("mapred.reduce.child.java.opts", "-Xmx" + (getMemPerReducer(params) + ioSortMB) + "m -Xms"
            + (getMemPerMapper(params))
            + "m -Duser.timezone='America/Chicago' -Djava.net.preferIPv4Stack=true -XX:CompileThreshold=10000 -XX:+DoEscapeAnalysis -XX:+UseNUMA -XX:-EliminateLocks -XX:+UseBiasedLocking -XX:+OptimizeStringConcat -XX:+UseFastAccessorMethods -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:+CMSIncrementalPacing -XX:CMSIncrementalDutyCycleMin=0 -XX:+UseCompressedOops -XX:+AggressiveOpts -XX:-UseStringCache -XX:ErrorFile=/tmp/hs_err_pid%p.log");

    conf.setInt("simsql.input.numSplits", pp.getNumCPUs());
    conf.setInt("mapred.job.reuse.jvm.num.tasks", 1);
    // conf.setBoolean ("mapred.map.tasks.speculative.execution", false);
    // conf.setBoolean ("mapred.reduce.tasks.speculative.execution", false);

    // tell it to use the jar that we just created
    conf.set("mapred.jar", jarFile);

    // conf.set("tmpjars", "file:///usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-core.jar");

    conf.setBoolean("mapred.output.compress", true);
    conf.setStrings("mapred.output.compression.type", new String[] { "RECORD" });

    // use snappy for the intermediate stuff
    conf.set("mapred.map.output.compression.codec", RecordCompression.getCodecClass());

    // do some additional operator-specific configurations
    setConfigurations(conf, params);

    // collect statistics for final relations always
    conf.setBoolean("simsql.collectStats", isFinal || collectStats);

    // figure out what file to map
    String[] inDirs = myInputNetwork.getPipelinedInputFiles();
    inDirs = excludeAnyWhoWillNotBeMapped(inDirs);
    String inSingleString = inDirs[0];
    conf.set("simsql.fileToMap", inSingleString);
    for (int i = 1; i < inDirs.length; i++) {
        inSingleString += "," + inDirs[i];
    }

    // create and name the job
    Job job;
    try {
        job = new Job(conf);
    } catch (Exception e) {
        throw new RuntimeException("Unable to create a new job!", e);
    }

    job.setJobName(getJobName());

    // set the map-reduce input and output types
    job.setMapOutputKeyClass(getMapOutputKeyClass());
    job.setMapOutputValueClass(getMapOutputValueClass());
    job.setOutputKeyClass(getOutputKeyClass());
    job.setOutputValueClass(getOutputValueClass());

    int numReducers = getNumReducers(params);

    job.setMapperClass(getMapperClass());
    job.setReducerClass(getReducerClass());

    // set the number of reducers
    job.setNumReduceTasks(numReducers);

    // set the input and the output formats... these extend FileInputFormat and FileOutputFormat
    job.setInputFormatClass(getInputFormatClass());
    job.setOutputFormatClass(getOutputFormatClass());

    // set the input and output paths
    try {
        System.out.println("input file: " + inSingleString);
        FileInputFormat.setInputPaths(job, inSingleString);
        FileInputFormat.setInputPathFilter(job, TableFileFilter.class);
        FileOutputFormat.setOutputPath(job, new Path(getOutput()));
    } catch (Exception e) {
        throw new RuntimeException("Unable to set up the input/output path for the job.", e);
    }

    // set the split size
    FileInputFormat.setMinInputSplitSize(job, getSplitSize(params));
    FileInputFormat.setMaxInputSplitSize(job, getSplitSize(params));

    // set the various sorting/grouping/mapping classes
    job.setGroupingComparatorClass(getGroupingComparatorClass());
    job.setPartitionerClass(getPartitionerClass());
    job.setSortComparatorClass(getSortComparatorClass());

    // and now, submit the job and wait for things to finish
    int exitCode;
    try {
        exitCode = job.waitForCompletion(verbose) ? 0 : 1;

        // get the output bytes counter.
        Counters c = job.getCounters();
        Counter mx = c.findCounter(OutputFileSerializer.Counters.BYTES_WRITTEN);

        // and use them to set the size of the output relation.
        if (myDB != null) {
            myDB.setTableSize(myDB.getTableName(getOutput()), mx.getValue());
            myDB.setNumAtts(myDB.getTableName(getOutput()), getOutputAttNames().length);
        }

    } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException("Unable to run the job", e);
    }

    // now, delete all the empty part files
    try {

        // get a filesystem
        FileSystem dfs = FileSystem.get(conf);
        Path outPath = new Path(getOutput());
        if (dfs.exists(outPath) && dfs.isDirectory(outPath)) {
            FileStatus fstatus[] = dfs.listStatus(outPath, new TableFileFilter());
            for (FileStatus ff : fstatus) {
                if (dfs.getContentSummary(ff.getPath()).getLength() <= 4) { // snappy leaves 4-byte long files around...
                    dfs.delete(ff.getPath(), true);
                }
            }
        }
    } catch (Exception e) { // this isn't disastrous 
    }
    return (exitCode == 0);
}

From source file:streaming.core.DownloadRunner.java

License:Apache License

public static int getTarFileByPath(HttpServletResponse res, String pathStr) {
    String[] paths = pathStr.split(",");
    try {// w  ww .  ja v  a2 s.  com
        OutputStream outputStream = res.getOutputStream();

        TarOutputStream tarOutputStream = new TarOutputStream(new BufferedOutputStream(outputStream));

        FileSystem fs = FileSystem.get(new Configuration());
        List<FileStatus> files = new ArrayList<FileStatus>();

        for (String path : paths) {
            Path p = new Path(path);
            if (fs.exists(p)) {
                if (fs.isFile(p)) {
                    files.add(fs.getFileStatus(p));
                } else if (fs.isDirectory(p)) {
                    FileStatus[] fileStatusArr = fs.listStatus(p);
                    if (fileStatusArr != null && fileStatusArr.length > 0) {

                        for (FileStatus cur : fileStatusArr) {
                            if (cur.isFile()) {
                                files.add(cur);
                            }
                        }
                    }
                }
            }

        }

        if (files.size() > 0) {
            FSDataInputStream inputStream = null;
            int len = files.size();
            int i = 1;
            for (FileStatus cur : files) {
                logger.info("[" + i++ + "/" + len + "]" + ",?" + cur);
                inputStream = fs.open(cur.getPath());

                tarOutputStream.putNextEntry(new HDFSTarEntry(cur, cur.getPath().getName()));
                org.apache.commons.io.IOUtils.copyLarge(inputStream, tarOutputStream);
                inputStream.close();

            }
            tarOutputStream.flush();
            tarOutputStream.close();
            return 200;
        } else
            return 400;

    } catch (Exception e) {
        e.printStackTrace();
        return 500;

    }
}

From source file:streaming.core.DownloadRunner.java

License:Apache License

public static int getRawFileByPath(HttpServletResponse res, String path, long position) {

    try {//w  ww. j a  va2  s . c o m
        FileSystem fs = FileSystem.get(new Configuration());

        Path p = new Path(path);
        if (fs.exists(p)) {

            List<FileStatus> files = new ArrayList<FileStatus>();

            //
            if (fs.isFile(p)) {
                files.add(fs.getFileStatus(p));
            } else if (fs.isDirectory(p)) {

                FileStatus[] fileStatusArr = fs.listStatus(p);
                if (fileStatusArr != null && fileStatusArr.length > 0) {

                    for (FileStatus cur : fileStatusArr) {
                        files.add(cur);
                    }
                }
            }

            //??
            if (files.size() > 0) {

                logger.info(path + "" + files.size());

                FSDataInputStream inputStream = null;
                OutputStream outputStream = res.getOutputStream();

                int len = files.size();
                int i = 1;
                long allPosition = 0;
                for (FileStatus cur : files) {

                    logger.info("[" + i++ + "/" + len + "]" + path + ",?" + cur);
                    inputStream = fs.open(cur.getPath());

                    if (position > 0) {

                        if (allPosition + cur.getLen() > position) {
                            inputStream.seek(position - allPosition);
                            logger.info("seek position " + (position - allPosition));
                            position = -1;
                        }
                        allPosition += cur.getLen();
                    }
                    org.apache.commons.io.IOUtils.copyLarge(inputStream, outputStream);
                    inputStream.close();

                }
                outputStream.flush();
                outputStream.close();
                return 200;

            } else {
                logger.info(path + "" + files.size());
            }

        } else {

            return 400;
        }

    } catch (Exception e) {
        e.printStackTrace();

    }

    return 500;
}

From source file:thinkbig.util.Util.java

License:Open Source License

/**
 * Recursively enumerate all files inside the directory dirPath in FileSystem fs
 * @param fs//from   www. j a  v  a 2 s  .co  m
 * @param dirPath
 * @param files
 * @throws IOException
 */
public static void getAllFiles(FileSystem fs, String dirPath, List<String> files) throws IOException {

    Path loc = new Path(dirPath);
    FileStatus[] statuses = fs.listStatus(loc);
    if (statuses != null) {
        int i = 0;
        for (FileStatus status : statuses) {
            String file = statuses[i++].getPath().toString();
            if (fs.isDirectory(new Path(file))) {
                getAllFiles(fs, file, files);
            } else if (files.indexOf(file) == -1) { // if not already there
                System.out.println(file);
                files.add(file);
            }
        }
    }
}

From source file:uk.ac.ucl.panda.indexing.io.BasicDocMaker.java

License:Apache License

protected void collectFiles(String path, ArrayList inputFiles) throws IOException {

    Path p = new Path(path);
    FileSystem fs = FileSystem.get(new Configuration());
    //System.out.println("Collect: "+f.getAbsolutePath());
    if (!fs.exists(p)) {
        return;/* ww  w  .  j  av a2 s. c  o  m*/
    }
    if (fs.isDirectory(p)) {
        RemoteIterator<LocatedFileStatus> fileIter = fs.listLocatedStatus(p);
        List<String> files = new ArrayList<String>();
        while (fileIter.hasNext()) {
            files.add(fileIter.next().getPath().toString());
        }
        Collections.sort(files);
        for (String f : files) {
            collectFiles(f, inputFiles);
        }
        return;
    }
    //////////////ucl
    if (path.toLowerCase().endsWith("z")) {
        inputFiles.add(path);
        addUniqueBytes(fs.getFileStatus(p).getLen());
    }
}