Example usage for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.kylinolap.job.hadoop.cube.CopySeq.java

License:Apache License

public static void copyTo64MB(String src, String dst) throws IOException {
    Configuration hconf = new Configuration();
    Path srcPath = new Path(src);
    Path dstPath = new Path(dst);

    FileSystem fs = FileSystem.get(hconf);
    long srcSize = fs.getFileStatus(srcPath).getLen();
    int copyTimes = (int) (67108864 / srcSize); // 64 MB
    System.out.println("Copy " + copyTimes + " times");

    Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath));
    Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
    Text value = new Text();

    Writer writer = SequenceFile.createWriter(hconf, Writer.file(dstPath), Writer.keyClass(key.getClass()),
            Writer.valueClass(Text.class), Writer.compression(CompressionType.BLOCK, getLZOCodec(hconf)));

    int count = 0;
    while (reader.next(key, value)) {
        for (int i = 0; i < copyTimes; i++) {
            writer.append(key, value);//from  w ww  .ja  va 2  s .  c  om
            count++;
        }
    }

    System.out.println("Len: " + writer.getLength());
    System.out.println("Rows: " + count);

    reader.close();
    writer.close();
}

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

@Override
public long getFileSize(long companyId, long repositoryId, String fileName)
        throws PortalException, SystemException {

    Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, VERSION_DEFAULT);

    try {// w  w w.  ja v  a2 s .c o  m
        FileSystem fileSystem = HadoopManager.getFileSystem();

        if (!fileSystem.exists(fullPath)) {
            throw new PortalException("File " + fullPath.toUri().toString() + " does not exist");
        }

        FileStatus fileStatus = fileSystem.getFileStatus(fullPath);

        return fileStatus.getLen();
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    }
}

From source file:com.lightboxtechnologies.spectrum.HDFSArchiver.java

License:Apache License

protected static void traverse(FileSystem fs, Path p, ZipOutputStream zout, byte[] buf) throws IOException {
    final String relpath = relativize(p);

    final FileStatus pstat = fs.getFileStatus(p);
    if (pstat.isDir()) {
        handleDirectory(relpath, fs, p, zout, buf);
    } else {/* ww  w .  j a  v a2s  .  co m*/
        handleFile(relpath, fs, p, zout, buf);
    }
}

From source file:com.linkedin.cubert.io.rubix.RubixFile.java

License:Open Source License

@SuppressWarnings("unchecked")
public List<KeyData<K>> getKeyData() throws IOException, ClassNotFoundException {
    final FileSystem fs = FileSystem.get(conf);
    keyData = new ArrayList<KeyData<K>>();

    final long filesize = fs.getFileStatus(path).getLen();
    FSDataInputStream in = fs.open(path);

    /* The last long in the file is the start position of the trailer section */
    in.seek(filesize - 8);//from   w  w  w .  j  a v  a  2s  .c o  m
    long metaDataStartPos = in.readLong();

    in.seek(metaDataStartPos);

    ObjectMapper mapper = new ObjectMapper();
    metadataJson = mapper.readValue(in.readUTF(), JsonNode.class);

    int keySectionSize = in.readInt();

    // load the key section
    byte[] keySection = new byte[keySectionSize];

    in.seek(filesize - keySectionSize - 8);
    in.read(keySection, 0, keySectionSize);
    in.close();

    ByteArrayInputStream bis = new ByteArrayInputStream(keySection);
    DataInput dataInput = new DataInputStream(bis);

    int numberOfBlocks = metadataJson.get("numberOfBlocks").getIntValue();

    // load the key section
    keyClass = (Class<K>) ClassCache.forName(JsonUtils.getText(metadataJson, "keyClass"));
    valueClass = (Class<V>) ClassCache.forName(JsonUtils.getText(metadataJson, "valueClass"));

    SerializationFactory serializationFactory = new SerializationFactory(conf);
    Deserializer<K> deserializer = serializationFactory.getDeserializer(keyClass);

    deserializer.open(bis);

    while (bis.available() > 0 && numberOfBlocks > 0) {
        K key = deserializer.deserialize(null);

        long offset = dataInput.readLong();
        long blockId = dataInput.readLong();
        long numRecords = dataInput.readLong();

        keyData.add(new KeyData<K>(key, offset, 0, numRecords, blockId));
        numberOfBlocks--;
    }

    // Assign length to each keydata entry
    int numEntries = keyData.size();
    for (int i = 1; i < numEntries; i++) {
        KeyData<K> prev = keyData.get(i - 1);
        KeyData<K> current = keyData.get(i);

        prev.setLength(current.getOffset() - prev.getOffset());
    }

    if (numEntries > 0) {
        KeyData<K> last = keyData.get(numEntries - 1);
        last.setLength(metaDataStartPos - last.offset);
    }

    return keyData;
}

From source file:com.linkedin.cubert.io.rubix.RubixFile.java

License:Open Source License

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException,
        ParseException, InstantiationException, IllegalAccessException {
    final int VERBOSE_NUM_ROWS = 4;

    Options options = new Options();

    options.addOption("h", "help", false, "shows this message");
    options.addOption("v", "verbose", false, "print summary and first few rows of each block");
    options.addOption("m", "metadata", false, "show the metadata");
    options.addOption("d", "dump", false,
            "dump the contents of the rubix file. Use -f for specifying format, and -o for specifying output location");
    options.addOption("f", "format", true, "the data format for dumping data (AVRO or TEXT). Default: TEXT");
    options.addOption("e", "extract", true,
            "Extract one rubix block matching the block id. Use -o for specifying output location");
    options.addOption("o", true, "Store the output at the specified location");

    CommandLineParser parser = new BasicParser();

    // parse the command line arguments
    CommandLine line = parser.parse(options, args);

    // show the help message
    if (line.hasOption("h")) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(/*  w  ww.  ja v a 2 s  .c  o  m*/
                "RubixFile <rubix file or dir> [options]\nIf no options are provided, print a summary of the blocks.",
                options);
        return;
    }

    // validate provided options
    if (line.hasOption("d") && line.hasOption("e")) {
        System.err.println("Cannot dump (-d) and extract (-e) at the same time!");
        return;
    }

    // obtain the list of rubix files
    String[] files = line.getArgs();
    if (files == null || files.length == 0) {
        System.err.println("Rubix file not specified");
        return;
    }

    Configuration conf = new JobConf();
    FileSystem fs = FileSystem.get(conf);

    Path path = new Path(files[0]);
    FileStatus[] allFiles;

    FileStatus status = fs.getFileStatus(path);
    if (status.isDir()) {
        allFiles = fs.listStatus(path, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.toString().contains(RubixConstants.RUBIX_EXTENSION);
            }

        });
    } else {
        allFiles = new FileStatus[] { status };
    }

    // walk over all files and extract the trailer section
    List<RubixFile<Tuple, Object>> rfiles = new ArrayList<RubixFile<Tuple, Object>>();

    for (FileStatus s : allFiles) {
        Path p = s.getPath();

        RubixFile<Tuple, Object> rfile = new RubixFile<Tuple, Object>(conf, p);

        // if printing meta data information.. exit after first file (since all files
        // have the same meta data)
        if (line.hasOption("m")) {
            rfile.getKeyData();

            System.out.println(new ObjectMapper().writer().writeValueAsString(rfile.metadataJson));
            break;
        }

        rfiles.add(rfile);
    }

    // dump the data
    if (line.hasOption("d")) {
        String format = line.getOptionValue("f");
        if (format == null)
            format = "TEXT";

        format = format.trim().toUpperCase();

        if (format.equals("AVRO")) {
            // dumpAvro(rfiles, line.getOptionValue("o"));
            throw new UnsupportedOperationException(
                    "Dumping to avro is not currently supporting. Please write a Cubert (map-only) script to store data in avro format");
        } else if (format.equals("TEXT")) {
            if (line.hasOption("o")) {
                System.err.println("Dumping TEXT format data *into a file* is not currently supported");
                return;
            }
            dumpText(rfiles, line.getOptionValue("o"), Integer.MAX_VALUE);
        } else {
            System.err.println("Invalid format [" + format + "] for dumping. Please use AVRO or TEXT");
            return;
        }
    } else if (line.hasOption("e")) // extract one rubix block
    {
        long blockId = Long.parseLong(line.getOptionValue("e"));
        extract(rfiles, blockId, line.getOptionValue("o"));
    } else
    // print summary
    {
        dumpText(rfiles, null, line.hasOption("v") ? VERBOSE_NUM_ROWS : 0);
    }
}

From source file:com.linkedin.cubert.io.rubix.RubixInputSplit.java

License:Open Source License

@Override
public String[] getLocations() throws IOException, InterruptedException {
    if (hostnames == null) {
        /* Obtain the FileSystem object and get the FileStatus objects for the split */
        FileSystem fileSystem = FileSystem.get(conf);
        FileStatus fileStatus = fileSystem.getFileStatus(filename);
        /*//from   w w w.j  a  va2 s  .  c  o m
         * Obtain the Block locations for the split. This also provides the offset and
         * length information for each block
         */
        final BlockLocation[] blockLocations = fileSystem.getFileBlockLocations(fileStatus, offset, length);
        /**
         * Collect all hosts in a map and populate the number of bytes to be read from
         * each host
         */
        Long l;
        Map<String, Long> hostMap = new HashMap<String, Long>();
        for (BlockLocation bl : blockLocations) {
            final long start = bl.getOffset() < offset ? offset : bl.getOffset();
            final long end = (offset + length) < (bl.getOffset() + bl.getLength()) ? offset + length
                    : bl.getOffset() + bl.getLength();
            final long nRelevantBytes = end - start;
            for (String host : bl.getHosts()) {
                hostMap.put(host, ((l = hostMap.get(host)) == null ? 0 : l) + nRelevantBytes);
            }
        }
        /* Sort them in decreasing order of maximum number of relevant bytes */
        final Set<Map.Entry<String, Long>> entries = hostMap.entrySet();
        final Map.Entry<String, Long>[] hostLengthPairs = entries.toArray(new Map.Entry[entries.size()]);

        Arrays.sort(hostLengthPairs, new Comparator<Map.Entry<String, Long>>() {
            @Override
            public int compare(Map.Entry<String, Long> e1, Map.Entry<String, Long> e2) {
                return (int) (e2.getValue() - e1.getValue());
            }
        });

        /* Populate the hostnames object */
        final int nHost = Math.min(hostLengthPairs.length, MAX_LOCATIONS);
        hostnames = new String[nHost];
        for (int i = 0; i < nHost; ++i) {
            hostnames[i] = hostLengthPairs[i].getKey();
        }
    }
    return hostnames;
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorageUtils.java

License:Apache License

/**
 * Returns all non-hidden files recursively inside the base paths given
 *
 * @throws IOException/*from   w  w  w  . j a v  a  2s  .c om*/
 */
public static Set<Path> getAllFilesRecursively(Set<Path> basePaths, Configuration conf) throws IOException {
    Set<Path> paths = new HashSet<Path>();
    for (Path path : basePaths) {
        FileSystem fs = FileSystem.get(path.toUri(), conf);
        FileStatus f = fs.getFileStatus(path);
        if (f.isDir()) {
            getAllFilesInternal(f, conf, paths, fs);
        } else {
            paths.add(path);
        }
    }
    return paths;
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorageUtils.java

License:Apache License

/** get last file of a hdfs path if it is  a directory;
 *   or return the file itself if path is a file
 *///from  ww w .  j  a  v a 2 s  .c  o  m
public static Path getLast(Path path, FileSystem fs) throws IOException {

    FileStatus status = fs.getFileStatus(path);
    if (!status.isDir()) {
        return path;
    }
    FileStatus[] statuses = fs.listStatus(path, PATH_FILTER);

    if (statuses.length == 0) {
        return null;
    } else {
        Arrays.sort(statuses);
        for (int i = statuses.length - 1; i >= 0; i--) {
            if (!statuses[i].isDir()) {
                return statuses[i].getPath();
            }
        }
        return null;
    }
}

From source file:com.linkedin.cubert.utils.AvroUtils.java

License:Open Source License

/**
 * Extracts the schema of an Avro file.//from   w  w w  .  j a  va  2 s.c om
 * 
 * @param conf
 * @param path
 * @return
 * @throws IOException
 */
public static Schema getSchema(Configuration conf, Path path) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.getFileStatus(path).isDir()) {
        Path globPath = new Path(path, "*.avro");
        FileStatus[] allFiles = fs.globStatus(globPath);
        if (allFiles.length == 0) {
            throw new IOException("there are no files in " + path.toString());
        }

        path = allFiles[0].getPath();
    }
    System.out.println("Obtaining schema of avro file " + path.toString());

    return getSchema(new FsInput(path, conf));
}

From source file:com.linkedin.cubert.utils.CommonUtils.java

License:Open Source License

public static Path getAFileInPath(Configuration conf, Path path, String suffix) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.getFileStatus(path).isDir()) {
        Path globPath = new Path(path, "*." + suffix);
        FileStatus[] allFiles = fs.globStatus(globPath);
        if (allFiles.length == 0) {
            throw new IOException("there are no files in " + path.toString());
        }/*  w w w.j a  va  2 s  .  c o  m*/

        path = allFiles[0].getPath();
    }

    print.f("Obtaining schema of %s file %s", suffix, path.toString());

    return path;
}