List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:com.kylinolap.job.hadoop.cube.CopySeq.java
License:Apache License
public static void copyTo64MB(String src, String dst) throws IOException { Configuration hconf = new Configuration(); Path srcPath = new Path(src); Path dstPath = new Path(dst); FileSystem fs = FileSystem.get(hconf); long srcSize = fs.getFileStatus(srcPath).getLen(); int copyTimes = (int) (67108864 / srcSize); // 64 MB System.out.println("Copy " + copyTimes + " times"); Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath)); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf); Text value = new Text(); Writer writer = SequenceFile.createWriter(hconf, Writer.file(dstPath), Writer.keyClass(key.getClass()), Writer.valueClass(Text.class), Writer.compression(CompressionType.BLOCK, getLZOCodec(hconf))); int count = 0; while (reader.next(key, value)) { for (int i = 0; i < copyTimes; i++) { writer.append(key, value);//from w ww .ja va 2 s . c om count++; } } System.out.println("Len: " + writer.getLength()); System.out.println("Rows: " + count); reader.close(); writer.close(); }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
@Override public long getFileSize(long companyId, long repositoryId, String fileName) throws PortalException, SystemException { Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, VERSION_DEFAULT); try {// w w w. ja v a2 s .c o m FileSystem fileSystem = HadoopManager.getFileSystem(); if (!fileSystem.exists(fullPath)) { throw new PortalException("File " + fullPath.toUri().toString() + " does not exist"); } FileStatus fileStatus = fileSystem.getFileStatus(fullPath); return fileStatus.getLen(); } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.lightboxtechnologies.spectrum.HDFSArchiver.java
License:Apache License
protected static void traverse(FileSystem fs, Path p, ZipOutputStream zout, byte[] buf) throws IOException { final String relpath = relativize(p); final FileStatus pstat = fs.getFileStatus(p); if (pstat.isDir()) { handleDirectory(relpath, fs, p, zout, buf); } else {/* ww w . j a v a2s . co m*/ handleFile(relpath, fs, p, zout, buf); } }
From source file:com.linkedin.cubert.io.rubix.RubixFile.java
License:Open Source License
@SuppressWarnings("unchecked") public List<KeyData<K>> getKeyData() throws IOException, ClassNotFoundException { final FileSystem fs = FileSystem.get(conf); keyData = new ArrayList<KeyData<K>>(); final long filesize = fs.getFileStatus(path).getLen(); FSDataInputStream in = fs.open(path); /* The last long in the file is the start position of the trailer section */ in.seek(filesize - 8);//from w w w . j a v a 2s .c o m long metaDataStartPos = in.readLong(); in.seek(metaDataStartPos); ObjectMapper mapper = new ObjectMapper(); metadataJson = mapper.readValue(in.readUTF(), JsonNode.class); int keySectionSize = in.readInt(); // load the key section byte[] keySection = new byte[keySectionSize]; in.seek(filesize - keySectionSize - 8); in.read(keySection, 0, keySectionSize); in.close(); ByteArrayInputStream bis = new ByteArrayInputStream(keySection); DataInput dataInput = new DataInputStream(bis); int numberOfBlocks = metadataJson.get("numberOfBlocks").getIntValue(); // load the key section keyClass = (Class<K>) ClassCache.forName(JsonUtils.getText(metadataJson, "keyClass")); valueClass = (Class<V>) ClassCache.forName(JsonUtils.getText(metadataJson, "valueClass")); SerializationFactory serializationFactory = new SerializationFactory(conf); Deserializer<K> deserializer = serializationFactory.getDeserializer(keyClass); deserializer.open(bis); while (bis.available() > 0 && numberOfBlocks > 0) { K key = deserializer.deserialize(null); long offset = dataInput.readLong(); long blockId = dataInput.readLong(); long numRecords = dataInput.readLong(); keyData.add(new KeyData<K>(key, offset, 0, numRecords, blockId)); numberOfBlocks--; } // Assign length to each keydata entry int numEntries = keyData.size(); for (int i = 1; i < numEntries; i++) { KeyData<K> prev = keyData.get(i - 1); KeyData<K> current = keyData.get(i); prev.setLength(current.getOffset() - prev.getOffset()); } if (numEntries > 0) { KeyData<K> last = keyData.get(numEntries - 1); last.setLength(metaDataStartPos - last.offset); } return keyData; }
From source file:com.linkedin.cubert.io.rubix.RubixFile.java
License:Open Source License
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, ParseException, InstantiationException, IllegalAccessException { final int VERBOSE_NUM_ROWS = 4; Options options = new Options(); options.addOption("h", "help", false, "shows this message"); options.addOption("v", "verbose", false, "print summary and first few rows of each block"); options.addOption("m", "metadata", false, "show the metadata"); options.addOption("d", "dump", false, "dump the contents of the rubix file. Use -f for specifying format, and -o for specifying output location"); options.addOption("f", "format", true, "the data format for dumping data (AVRO or TEXT). Default: TEXT"); options.addOption("e", "extract", true, "Extract one rubix block matching the block id. Use -o for specifying output location"); options.addOption("o", true, "Store the output at the specified location"); CommandLineParser parser = new BasicParser(); // parse the command line arguments CommandLine line = parser.parse(options, args); // show the help message if (line.hasOption("h")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(/* w ww. ja v a 2 s .c o m*/ "RubixFile <rubix file or dir> [options]\nIf no options are provided, print a summary of the blocks.", options); return; } // validate provided options if (line.hasOption("d") && line.hasOption("e")) { System.err.println("Cannot dump (-d) and extract (-e) at the same time!"); return; } // obtain the list of rubix files String[] files = line.getArgs(); if (files == null || files.length == 0) { System.err.println("Rubix file not specified"); return; } Configuration conf = new JobConf(); FileSystem fs = FileSystem.get(conf); Path path = new Path(files[0]); FileStatus[] allFiles; FileStatus status = fs.getFileStatus(path); if (status.isDir()) { allFiles = fs.listStatus(path, new PathFilter() { @Override public boolean accept(Path path) { return path.toString().contains(RubixConstants.RUBIX_EXTENSION); } }); } else { allFiles = new FileStatus[] { status }; } // walk over all files and extract the trailer section List<RubixFile<Tuple, Object>> rfiles = new ArrayList<RubixFile<Tuple, Object>>(); for (FileStatus s : allFiles) { Path p = s.getPath(); RubixFile<Tuple, Object> rfile = new RubixFile<Tuple, Object>(conf, p); // if printing meta data information.. exit after first file (since all files // have the same meta data) if (line.hasOption("m")) { rfile.getKeyData(); System.out.println(new ObjectMapper().writer().writeValueAsString(rfile.metadataJson)); break; } rfiles.add(rfile); } // dump the data if (line.hasOption("d")) { String format = line.getOptionValue("f"); if (format == null) format = "TEXT"; format = format.trim().toUpperCase(); if (format.equals("AVRO")) { // dumpAvro(rfiles, line.getOptionValue("o")); throw new UnsupportedOperationException( "Dumping to avro is not currently supporting. Please write a Cubert (map-only) script to store data in avro format"); } else if (format.equals("TEXT")) { if (line.hasOption("o")) { System.err.println("Dumping TEXT format data *into a file* is not currently supported"); return; } dumpText(rfiles, line.getOptionValue("o"), Integer.MAX_VALUE); } else { System.err.println("Invalid format [" + format + "] for dumping. Please use AVRO or TEXT"); return; } } else if (line.hasOption("e")) // extract one rubix block { long blockId = Long.parseLong(line.getOptionValue("e")); extract(rfiles, blockId, line.getOptionValue("o")); } else // print summary { dumpText(rfiles, null, line.hasOption("v") ? VERBOSE_NUM_ROWS : 0); } }
From source file:com.linkedin.cubert.io.rubix.RubixInputSplit.java
License:Open Source License
@Override public String[] getLocations() throws IOException, InterruptedException { if (hostnames == null) { /* Obtain the FileSystem object and get the FileStatus objects for the split */ FileSystem fileSystem = FileSystem.get(conf); FileStatus fileStatus = fileSystem.getFileStatus(filename); /*//from w w w.j a va2 s . c o m * Obtain the Block locations for the split. This also provides the offset and * length information for each block */ final BlockLocation[] blockLocations = fileSystem.getFileBlockLocations(fileStatus, offset, length); /** * Collect all hosts in a map and populate the number of bytes to be read from * each host */ Long l; Map<String, Long> hostMap = new HashMap<String, Long>(); for (BlockLocation bl : blockLocations) { final long start = bl.getOffset() < offset ? offset : bl.getOffset(); final long end = (offset + length) < (bl.getOffset() + bl.getLength()) ? offset + length : bl.getOffset() + bl.getLength(); final long nRelevantBytes = end - start; for (String host : bl.getHosts()) { hostMap.put(host, ((l = hostMap.get(host)) == null ? 0 : l) + nRelevantBytes); } } /* Sort them in decreasing order of maximum number of relevant bytes */ final Set<Map.Entry<String, Long>> entries = hostMap.entrySet(); final Map.Entry<String, Long>[] hostLengthPairs = entries.toArray(new Map.Entry[entries.size()]); Arrays.sort(hostLengthPairs, new Comparator<Map.Entry<String, Long>>() { @Override public int compare(Map.Entry<String, Long> e1, Map.Entry<String, Long> e2) { return (int) (e2.getValue() - e1.getValue()); } }); /* Populate the hostnames object */ final int nHost = Math.min(hostLengthPairs.length, MAX_LOCATIONS); hostnames = new String[nHost]; for (int i = 0; i < nHost; ++i) { hostnames[i] = hostLengthPairs[i].getKey(); } } return hostnames; }
From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorageUtils.java
License:Apache License
/** * Returns all non-hidden files recursively inside the base paths given * * @throws IOException/*from w w w . j a v a 2s .c om*/ */ public static Set<Path> getAllFilesRecursively(Set<Path> basePaths, Configuration conf) throws IOException { Set<Path> paths = new HashSet<Path>(); for (Path path : basePaths) { FileSystem fs = FileSystem.get(path.toUri(), conf); FileStatus f = fs.getFileStatus(path); if (f.isDir()) { getAllFilesInternal(f, conf, paths, fs); } else { paths.add(path); } } return paths; }
From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorageUtils.java
License:Apache License
/** get last file of a hdfs path if it is a directory; * or return the file itself if path is a file *///from ww w . j a v a 2 s .c o m public static Path getLast(Path path, FileSystem fs) throws IOException { FileStatus status = fs.getFileStatus(path); if (!status.isDir()) { return path; } FileStatus[] statuses = fs.listStatus(path, PATH_FILTER); if (statuses.length == 0) { return null; } else { Arrays.sort(statuses); for (int i = statuses.length - 1; i >= 0; i--) { if (!statuses[i].isDir()) { return statuses[i].getPath(); } } return null; } }
From source file:com.linkedin.cubert.utils.AvroUtils.java
License:Open Source License
/** * Extracts the schema of an Avro file.//from w w w . j a va 2 s.c om * * @param conf * @param path * @return * @throws IOException */ public static Schema getSchema(Configuration conf, Path path) throws IOException { FileSystem fs = path.getFileSystem(conf); if (fs.getFileStatus(path).isDir()) { Path globPath = new Path(path, "*.avro"); FileStatus[] allFiles = fs.globStatus(globPath); if (allFiles.length == 0) { throw new IOException("there are no files in " + path.toString()); } path = allFiles[0].getPath(); } System.out.println("Obtaining schema of avro file " + path.toString()); return getSchema(new FsInput(path, conf)); }
From source file:com.linkedin.cubert.utils.CommonUtils.java
License:Open Source License
public static Path getAFileInPath(Configuration conf, Path path, String suffix) throws IOException { FileSystem fs = path.getFileSystem(conf); if (fs.getFileStatus(path).isDir()) { Path globPath = new Path(path, "*." + suffix); FileStatus[] allFiles = fs.globStatus(globPath); if (allFiles.length == 0) { throw new IOException("there are no files in " + path.toString()); }/* w w w.j a va 2 s . c o m*/ path = allFiles[0].getPath(); } print.f("Obtaining schema of %s file %s", suffix, path.toString()); return path; }