List of usage examples for org.apache.hadoop.fs FileSystem isDirectory
@Deprecated public boolean isDirectory(Path f) throws IOException
From source file:pl.edu.icm.coansys.statisticsgenerator.tools.ViewTool.java
License:Open Source License
private static void processFileOrDirectory(Path pt, Configuration conf) throws IOException { FileSystem fs = pt.getFileSystem(conf); if (fs.isDirectory(pt)) { for (FileStatus fstat : fs.listStatus(pt)) { processFileOrDirectory(fstat.getPath(), conf); }/*from www.ja v a 2s . c om*/ } else if (fs.isFile(pt)) { viewFile(pt, conf); } else { //zaloguj bd } }
From source file:reconcile.hbase.mapreduce.KeyListInputFormat.java
License:Open Source License
@SuppressWarnings("deprecation") @Override/*from w w w . j a v a 2 s. c o m*/ public List<InputSplit> getSplits(JobContext context) throws IOException { ArrayList<InputSplit> splits = new ArrayList<InputSplit>(); TreeSet<String> uniqueKeys = new TreeSet<String>(); FileSystem fs = FileSystem.get(context.getConfiguration()); final String tableName = context.getConfiguration().get(JobConfig.TABLE_CONF); if (tableName == null || tableName.length() == 0) throw new IOException("HBase table name was not provided"); int splitSize = Integer.parseInt(context.getConfiguration().get(SPLIT_SIZE, DEFAULT_SPLIT_SIZE)); final int maxSplits = Integer.parseInt(context.getConfiguration().get(MAX_SPLITS, DEFAULT_MAX_SPLITS)); Path[] files = getInputPaths(context); for (Path file : files) { if (fs.isDirectory(file) || !fs.exists(file)) throw new IOException("Not a valid key list file: " + file.toString()); InputStream is = null; BufferedReader reader = null; try { is = fs.open(file); reader = new BufferedReader(new InputStreamReader(is)); String line = null; while ((line = reader.readLine()) != null) { String key = line.trim(); if (key.length() > 0 && !key.startsWith("#")) { uniqueKeys.add(key); } } } finally { IOUtils.closeQuietly(is); IOUtils.closeQuietly(reader); } } ArrayList<String> keys = new ArrayList<String>(); keys.addAll(uniqueKeys); if (keys.size() > 0) { HConnection conn = HConnectionManager.getConnection(context.getConfiguration()); int numSplits = Math.max((int) Math.ceil(keys.size() / (splitSize * 1.0)), 1); if (numSplits > maxSplits) { splitSize = (int) Math.ceil(keys.size() / (maxSplits * 1.0)); LOG.info("Overriding split size with (" + splitSize + ") to maintain max splits of (" + maxSplits + ")"); numSplits = Math.max((int) Math.ceil(keys.size() / (splitSize * 1.0)), 1); } LOG.info("There are (" + keys.size() + ") total keys. Split size(" + splitSize + ") Num splits(" + numSplits + ")"); int startNdx = 0; for (int i = 0; i < (numSplits - 1); ++i) { int endNdx = startNdx + splitSize; splits.add(createSplit(conn, tableName, keys, startNdx, endNdx)); startNdx = endNdx; } // Add last split splits.add(createSplit(conn, tableName, keys, startNdx, keys.size())); } return splits; }
From source file:simsql.code_generator.MyPhysicalDatabase.java
License:Apache License
public void restoreFrom(String fromHere) { try {/*from w w w . j a v a2s . c o m*/ // first, we see if the directory that we are reading from // exists and is a directory. Configuration conf = new Configuration(); FileSystem dfs = FileSystem.get(conf); Path pathFrom = new Path(fromHere); if (!dfs.exists(pathFrom) || !dfs.isDirectory(pathFrom)) { System.out.println("The specified restoration path does not exist or is not a directory!"); return; } // now, get the destination path. Path pathTo = new Path(myDir); if (dfs.exists(pathTo)) { // destroy it, if it's there. dfs.delete(pathTo, true); } // make the directory // dfs.mkdirs(pathTo); // and all the paths we will be copying Path[] sourcePaths = FileUtil.stat2Paths(dfs.globStatus(pathFrom), pathFrom); for (Path sp : sourcePaths) { // restore all of it. FileUtil.copy(dfs, sp, dfs, pathTo, false, conf); } } catch (Exception e) { throw new RuntimeException("Could not restore data from directory " + fromHere, e); } }
From source file:simsql.runtime.HDFSTableStats.java
License:Apache License
@SuppressWarnings("unchecked") public void load(String path) throws IOException, ClassNotFoundException { // look up the input file... Path file = new Path(path); Configuration conf = new Configuration(); FileSystem fs = file.getFileSystem(conf); // is it a directory? if (fs.exists(file) && fs.isDirectory(file)) { // if so, traverse all of it. clear();/* w w w .j a v a 2s. co m*/ for (FileStatus ff : fs.listStatus(file, new StatsFileFilter())) { HDFSTableStats guyMerged = new HDFSTableStats(); guyMerged.load(ff.getPath().toUri().getPath()); consume(guyMerged); } } else if (fs.exists(file)) { // otherwise, just read it in. FSDataInputStream fileIn = fs.open(file); ObjectInputStream in = new ObjectInputStream(fileIn); HDFSTableStats newGuy = (HDFSTableStats) in.readObject(); in.close(); // destroy our contents and read. clear(); consume(newGuy); } }
From source file:simsql.runtime.MRLoader.java
License:Apache License
public long run(String inputPath, String outputPath, short typeCode, Relation r, int sortAtt) { // make a directory for the relation Configuration conf = new Configuration(); FileSystem dfs = null;/*from w w w . j a va2 s. c o m*/ try { dfs = FileSystem.get(conf); } catch (Exception e) { throw new RuntimeException("Cannot access HDFS!", e); } try { // if it exists, destroy it. Path path = new Path(outputPath); if (dfs.exists(path)) { dfs.delete(path, true); } } catch (Exception e) { throw new RuntimeException("Could not create the file to bulk load to!", e); } // find a file name String tempPath = null; if (inputPath.startsWith("hdfs:")) { tempPath = inputPath.replace("hdfs:", ""); } else { tempPath = "/tempDataFile_" + r.getName(); try { dfs.delete(new Path(tempPath), true); } catch (Exception e) { // ignore this. } // upload the text file try { dfs.copyFromLocalFile(false, true, new Path(inputPath), new Path(tempPath)); dfs.deleteOnExit(new Path(tempPath)); } catch (Exception e) { throw new RuntimeException("Failed to upload text file " + inputPath + " to HDFS!", e); } } // set up the new job's parameters. conf.setBoolean("mapred.compress.map.output", true); conf.set("mapred.map.output.compression.codec", RecordCompression.getCodecClass()); conf.set("io.serializations", "simsql.runtime.RecordSerialization,simsql.runtime.RecordKeySerialization,org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInt("simsql.loader.numAtts", r.getAttributes().size()); conf.setInt("simsql.loader.typeCode", (int) typeCode); conf.setInt("simsql.loader.sortAtt", sortAtt); String[] myStrings = new String[r.getAttributes().size()]; int j = 0; for (simsql.compiler.Attribute a : r.getAttributes()) { myStrings[j++] = a.getPhysicalRealization().getClass().getName(); } conf.setStrings("simsql.loader.types", myStrings); // create a job Job job; try { job = new Job(conf); } catch (Exception e) { throw new RuntimeException("Unable to create bulk loading job!", e); } // set the split size (number of mappers) long fSize = 0; if (inputPath.startsWith("hdfs")) { fSize = RelOp.getPathsTotalSize(new String[] { tempPath }); } else { fSize = new File(inputPath).length(); } FileInputFormat.setMinInputSplitSize(job, fSize / (long) numTasks); FileInputFormat.setMaxInputSplitSize(job, fSize / (long) numTasks); // and the number of reducers job.setNumReduceTasks(numTasks); // the mapper/reducer/jar job.setMapperClass(MRLoaderMapper.class); job.setReducerClass(MRLoaderReducer.class); job.setJarByClass(MRLoader.class); // I/O settings. job.setOutputFormatClass(RecordOutputFormat.class); job.setMapOutputKeyClass(RecordKey.class); job.setMapOutputValueClass(RecordWrapper.class); job.setOutputKeyClass(Nothing.class); job.setOutputValueClass(Record.class); try { FileInputFormat.setInputPaths(job, new Path(tempPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); } catch (Exception e) { throw new RuntimeException("Could not set job inputs/outputs", e); } job.setGroupingComparatorClass(RecordKeyGroupingComparator.class); job.setPartitionerClass(RecordPartitioner.class); job.setSortComparatorClass(RecordKeySortComparator.class); job.setJobName("MRLoader: " + inputPath + " ==> " + outputPath); // run it Counters counters; try { job.waitForCompletion(true); counters = job.getCounters(); } catch (Exception e) { throw new RuntimeException("Could not set up bulk loader job!", e); } // now, delete all the empty part files try { // get a filesystem FileSystem ddfs = FileSystem.get(conf); Path outPath = new Path(outputPath); if (ddfs.exists(outPath) && ddfs.isDirectory(outPath)) { FileStatus fstatus[] = ddfs.listStatus(outPath, new TableFileFilter()); for (FileStatus ff : fstatus) { if (ddfs.getContentSummary(ff.getPath()).getLength() <= 4) { // snappy leaves 4-byte long files around... ddfs.delete(ff.getPath(), true); } } } } catch (Exception e) { // this isn't disastrous } // get the counter for the output of the mapper. Counter bytesCounter = counters.findCounter(OutputFileSerializer.Counters.BYTES_WRITTEN); return bytesCounter.getValue(); }
From source file:simsql.runtime.RelOp.java
License:Apache License
public boolean run(RuntimeParameter params, boolean verbose) { ExampleRuntimeParameter pp = (ExampleRuntimeParameter) params; // build the jar. String jarFile = buildJarFile(params); // Get the default configuration object Configuration conf = new Configuration(); // set quite mode on/off conf.setQuietMode(!verbose);/* w ww . j a va2s. co m*/ /*** conf.setBoolean("mapred.task.profile", true); conf.set("mapred.task.profile.params", "-agentlib:hprof=cpu=samples," + "heap=sites,depth=8,force=n,thread=y,verbose=n,file=%s"); ***/ // tell it how to serialize and deserialize records and recordkeys conf.set("io.serializations", getSerializations()); conf.setBoolean("mapred.compress.map.output", true); int ioSortMB = conf.getInt("io.sort.mb", 256); conf.set("mapred.map.child.java.opts", "-Xmx" + (getMemPerMapper(params) + ioSortMB) + "m -Xms" + (getMemPerMapper(params)) + "m -Duser.timezone='America/Chicago' -Djava.net.preferIPv4Stack=true -XX:CompileThreshold=10000 -XX:+DoEscapeAnalysis -XX:+UseNUMA -XX:-EliminateLocks -XX:+UseBiasedLocking -XX:+OptimizeStringConcat -XX:+UseFastAccessorMethods -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:+CMSIncrementalPacing -XX:CMSIncrementalDutyCycleMin=0 -XX:+UseCompressedOops -XX:+AggressiveOpts -XX:-UseStringCache -XX:ErrorFile=/tmp/hs_err_pid%p.log"); conf.set("mapred.reduce.child.java.opts", "-Xmx" + (getMemPerReducer(params) + ioSortMB) + "m -Xms" + (getMemPerMapper(params)) + "m -Duser.timezone='America/Chicago' -Djava.net.preferIPv4Stack=true -XX:CompileThreshold=10000 -XX:+DoEscapeAnalysis -XX:+UseNUMA -XX:-EliminateLocks -XX:+UseBiasedLocking -XX:+OptimizeStringConcat -XX:+UseFastAccessorMethods -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:+CMSIncrementalPacing -XX:CMSIncrementalDutyCycleMin=0 -XX:+UseCompressedOops -XX:+AggressiveOpts -XX:-UseStringCache -XX:ErrorFile=/tmp/hs_err_pid%p.log"); conf.setInt("simsql.input.numSplits", pp.getNumCPUs()); conf.setInt("mapred.job.reuse.jvm.num.tasks", 1); // conf.setBoolean ("mapred.map.tasks.speculative.execution", false); // conf.setBoolean ("mapred.reduce.tasks.speculative.execution", false); // tell it to use the jar that we just created conf.set("mapred.jar", jarFile); // conf.set("tmpjars", "file:///usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-core.jar"); conf.setBoolean("mapred.output.compress", true); conf.setStrings("mapred.output.compression.type", new String[] { "RECORD" }); // use snappy for the intermediate stuff conf.set("mapred.map.output.compression.codec", RecordCompression.getCodecClass()); // do some additional operator-specific configurations setConfigurations(conf, params); // collect statistics for final relations always conf.setBoolean("simsql.collectStats", isFinal || collectStats); // figure out what file to map String[] inDirs = myInputNetwork.getPipelinedInputFiles(); inDirs = excludeAnyWhoWillNotBeMapped(inDirs); String inSingleString = inDirs[0]; conf.set("simsql.fileToMap", inSingleString); for (int i = 1; i < inDirs.length; i++) { inSingleString += "," + inDirs[i]; } // create and name the job Job job; try { job = new Job(conf); } catch (Exception e) { throw new RuntimeException("Unable to create a new job!", e); } job.setJobName(getJobName()); // set the map-reduce input and output types job.setMapOutputKeyClass(getMapOutputKeyClass()); job.setMapOutputValueClass(getMapOutputValueClass()); job.setOutputKeyClass(getOutputKeyClass()); job.setOutputValueClass(getOutputValueClass()); int numReducers = getNumReducers(params); job.setMapperClass(getMapperClass()); job.setReducerClass(getReducerClass()); // set the number of reducers job.setNumReduceTasks(numReducers); // set the input and the output formats... these extend FileInputFormat and FileOutputFormat job.setInputFormatClass(getInputFormatClass()); job.setOutputFormatClass(getOutputFormatClass()); // set the input and output paths try { System.out.println("input file: " + inSingleString); FileInputFormat.setInputPaths(job, inSingleString); FileInputFormat.setInputPathFilter(job, TableFileFilter.class); FileOutputFormat.setOutputPath(job, new Path(getOutput())); } catch (Exception e) { throw new RuntimeException("Unable to set up the input/output path for the job.", e); } // set the split size FileInputFormat.setMinInputSplitSize(job, getSplitSize(params)); FileInputFormat.setMaxInputSplitSize(job, getSplitSize(params)); // set the various sorting/grouping/mapping classes job.setGroupingComparatorClass(getGroupingComparatorClass()); job.setPartitionerClass(getPartitionerClass()); job.setSortComparatorClass(getSortComparatorClass()); // and now, submit the job and wait for things to finish int exitCode; try { exitCode = job.waitForCompletion(verbose) ? 0 : 1; // get the output bytes counter. Counters c = job.getCounters(); Counter mx = c.findCounter(OutputFileSerializer.Counters.BYTES_WRITTEN); // and use them to set the size of the output relation. if (myDB != null) { myDB.setTableSize(myDB.getTableName(getOutput()), mx.getValue()); myDB.setNumAtts(myDB.getTableName(getOutput()), getOutputAttNames().length); } } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("Unable to run the job", e); } // now, delete all the empty part files try { // get a filesystem FileSystem dfs = FileSystem.get(conf); Path outPath = new Path(getOutput()); if (dfs.exists(outPath) && dfs.isDirectory(outPath)) { FileStatus fstatus[] = dfs.listStatus(outPath, new TableFileFilter()); for (FileStatus ff : fstatus) { if (dfs.getContentSummary(ff.getPath()).getLength() <= 4) { // snappy leaves 4-byte long files around... dfs.delete(ff.getPath(), true); } } } } catch (Exception e) { // this isn't disastrous } return (exitCode == 0); }
From source file:streaming.core.DownloadRunner.java
License:Apache License
public static int getTarFileByPath(HttpServletResponse res, String pathStr) { String[] paths = pathStr.split(","); try {// w ww . ja v a2 s. com OutputStream outputStream = res.getOutputStream(); TarOutputStream tarOutputStream = new TarOutputStream(new BufferedOutputStream(outputStream)); FileSystem fs = FileSystem.get(new Configuration()); List<FileStatus> files = new ArrayList<FileStatus>(); for (String path : paths) { Path p = new Path(path); if (fs.exists(p)) { if (fs.isFile(p)) { files.add(fs.getFileStatus(p)); } else if (fs.isDirectory(p)) { FileStatus[] fileStatusArr = fs.listStatus(p); if (fileStatusArr != null && fileStatusArr.length > 0) { for (FileStatus cur : fileStatusArr) { if (cur.isFile()) { files.add(cur); } } } } } } if (files.size() > 0) { FSDataInputStream inputStream = null; int len = files.size(); int i = 1; for (FileStatus cur : files) { logger.info("[" + i++ + "/" + len + "]" + ",?" + cur); inputStream = fs.open(cur.getPath()); tarOutputStream.putNextEntry(new HDFSTarEntry(cur, cur.getPath().getName())); org.apache.commons.io.IOUtils.copyLarge(inputStream, tarOutputStream); inputStream.close(); } tarOutputStream.flush(); tarOutputStream.close(); return 200; } else return 400; } catch (Exception e) { e.printStackTrace(); return 500; } }
From source file:streaming.core.DownloadRunner.java
License:Apache License
public static int getRawFileByPath(HttpServletResponse res, String path, long position) { try {//w ww. j a va2 s . c o m FileSystem fs = FileSystem.get(new Configuration()); Path p = new Path(path); if (fs.exists(p)) { List<FileStatus> files = new ArrayList<FileStatus>(); // if (fs.isFile(p)) { files.add(fs.getFileStatus(p)); } else if (fs.isDirectory(p)) { FileStatus[] fileStatusArr = fs.listStatus(p); if (fileStatusArr != null && fileStatusArr.length > 0) { for (FileStatus cur : fileStatusArr) { files.add(cur); } } } //?? if (files.size() > 0) { logger.info(path + "" + files.size()); FSDataInputStream inputStream = null; OutputStream outputStream = res.getOutputStream(); int len = files.size(); int i = 1; long allPosition = 0; for (FileStatus cur : files) { logger.info("[" + i++ + "/" + len + "]" + path + ",?" + cur); inputStream = fs.open(cur.getPath()); if (position > 0) { if (allPosition + cur.getLen() > position) { inputStream.seek(position - allPosition); logger.info("seek position " + (position - allPosition)); position = -1; } allPosition += cur.getLen(); } org.apache.commons.io.IOUtils.copyLarge(inputStream, outputStream); inputStream.close(); } outputStream.flush(); outputStream.close(); return 200; } else { logger.info(path + "" + files.size()); } } else { return 400; } } catch (Exception e) { e.printStackTrace(); } return 500; }
From source file:thinkbig.util.Util.java
License:Open Source License
/** * Recursively enumerate all files inside the directory dirPath in FileSystem fs * @param fs//from www. j a v a 2 s .co m * @param dirPath * @param files * @throws IOException */ public static void getAllFiles(FileSystem fs, String dirPath, List<String> files) throws IOException { Path loc = new Path(dirPath); FileStatus[] statuses = fs.listStatus(loc); if (statuses != null) { int i = 0; for (FileStatus status : statuses) { String file = statuses[i++].getPath().toString(); if (fs.isDirectory(new Path(file))) { getAllFiles(fs, file, files); } else if (files.indexOf(file) == -1) { // if not already there System.out.println(file); files.add(file); } } } }
From source file:uk.ac.ucl.panda.indexing.io.BasicDocMaker.java
License:Apache License
protected void collectFiles(String path, ArrayList inputFiles) throws IOException { Path p = new Path(path); FileSystem fs = FileSystem.get(new Configuration()); //System.out.println("Collect: "+f.getAbsolutePath()); if (!fs.exists(p)) { return;/* ww w . j av a2 s. c o m*/ } if (fs.isDirectory(p)) { RemoteIterator<LocatedFileStatus> fileIter = fs.listLocatedStatus(p); List<String> files = new ArrayList<String>(); while (fileIter.hasNext()) { files.add(fileIter.next().getPath().toString()); } Collections.sort(files); for (String f : files) { collectFiles(f, inputFiles); } return; } //////////////ucl if (path.toLowerCase().endsWith("z")) { inputFiles.add(path); addUniqueBytes(fs.getFileStatus(p).getLen()); } }