List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.util.DistributedRowMatrix.java
License:Apache License
/** * This implements matrix multiplication A * B using MapReduce tasks on CPU or * GPU/* w w w . java 2s . c o m*/ * * @param other a DistributedRowMatrix * @param outPath path to write result to * @param useGPU use GPU or CPU (default: false, use CPU) * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU, boolean transposeMatrixA) throws IOException, ClassNotFoundException, InterruptedException { // Check if cols of MatrixA = rows of MatrixB // (l x m) * (m x n) = (l x n) if (numCols != other.numRows()) { throw new CardinalityException(numCols, other.numRows()); } Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf(); // Transpose Matrix within a new MapReduce Job DistributedRowMatrix transposed = this; if (transposeMatrixA) { transposed = transposed.transpose(); } // Debug // System.out.println("DistributedRowMatrix transposed:"); // transposed.printDistributedRowMatrix(); // Build MatrixMultiplication job configuration BSPJob job = null; if (!useGPU) { job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, transposed.rowPath, other.rowPath, outPath.getParent(), other.numCols); } else { // use GPU job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, transposed.rowPath, other.rowPath, outPath.getParent(), other.numCols); } // Multiply Matrix with transposed one if (job.waitForCompletion(true)) { // Rename result file to output path Configuration conf = job.getConfiguration(); FileSystem fs = outPath.getFileSystem(conf); FileStatus[] files = fs.listStatus(outPath.getParent()); for (int i = 0; i < files.length; i++) { if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) { fs.rename(files[i].getPath(), outPath); break; } } // Read resulting Matrix from HDFS DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows, other.numCols()); out.setConf(conf); return out; } return null; }
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.util.DistributedRowMatrix.java
License:Apache License
/** * This implements matrix multiplication A * B using MapReduce tasks on CPU or * GPU/* w ww . jav a2s.com*/ * * @param other a DistributedRowMatrix * @param outPath path to write result to * @param useGPU use GPU or CPU (default: false, use CPU) * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU) throws IOException, ClassNotFoundException, InterruptedException { // Check if cols of MatrixA = rows of MatrixB // (l x m) * (m x n) = (l x n) if (numCols != other.numRows()) { throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")"); } Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf(); // Debug // System.out.println("DistributedRowMatrix transposed:"); // transposed.printDistributedRowMatrix(); // Build MatrixMultiplication job configuration BSPJob job = null; if (!useGPU) { job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, this.rowPath, other.rowPath, outPath.getParent()); } else { // use GPU job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, this.rowPath, other.rowPath, outPath.getParent()); } // Multiply Matrix if (job.waitForCompletion(true)) { // Rename result file to output path Configuration conf = job.getConfiguration(); FileSystem fs = outPath.getFileSystem(conf); FileStatus[] files = fs.listStatus(outPath.getParent()); for (int i = 0; i < files.length; i++) { if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) { fs.rename(files[i].getPath(), outPath); break; } } // Read resulting Matrix from HDFS DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows, other.numCols()); out.setConf(conf); return out; } return null; }
From source file:azkaban.jobtype.hiveutils.azkaban.hive.actions.Utils.java
License:Apache License
static ArrayList<String> fetchDirectories(FileSystem fs, String location, boolean returnFullPath) throws IOException, HiveViaAzkabanException { LOG.info("Fetching directories in " + location); Path p = new Path(location); FileStatus[] statuses = fs.listStatus(p); if (statuses == null || statuses.length == 0) { throw new HiveViaAzkabanException("Couldn't find any directories in " + location); }//from w w w .j a va 2 s . co m ArrayList<String> files = new ArrayList<String>(statuses.length); for (FileStatus status : statuses) { if (!status.isDir()) continue; if (status.getPath().getName().startsWith(".")) continue; files.add(returnFullPath ? status.getPath().toString() : status.getPath().getName()); } return files; }
From source file:azkaban.jobtype.javautils.AbstractHadoopJob.java
License:Apache License
@SuppressWarnings("rawtypes") public JobConf createJobConf(Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass) throws IOException, URISyntaxException { JobConf conf = new JobConf(); // set custom class loader with custom find resource strategy. conf.setJobName(getJobName());//from ww w. j a v a 2s.c o m conf.setMapperClass(mapperClass); if (reducerClass != null) { conf.setReducerClass(reducerClass); } if (props.getBoolean("is.local", false)) { conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); conf.set("mapred.local.dir", "/tmp/map-red"); logger.info("Running locally, no hadoop jar set."); } else { HadoopUtils.setClassLoaderAndJar(conf, getClass()); logger.info("Setting hadoop jar file for class:" + getClass() + " to " + conf.getJar()); logger.info("*************************************************************************"); logger.info( " Running on Real Hadoop Cluster(" + conf.get("mapred.job.tracker") + ") "); logger.info("*************************************************************************"); } // set JVM options if present if (props.containsKey("mapred.child.java.opts")) { conf.set("mapred.child.java.opts", props.getString("mapred.child.java.opts")); logger.info("mapred.child.java.opts set to " + props.getString("mapred.child.java.opts")); } // set input and output paths if they are present if (props.containsKey("input.paths")) { List<String> inputPaths = props.getStringList("input.paths"); if (inputPaths.size() == 0) throw new IllegalArgumentException("Must specify at least one value for property 'input.paths'"); for (String path : inputPaths) { HadoopUtils.addAllSubPaths(conf, new Path(path)); } } if (props.containsKey("output.path")) { String location = props.get("output.path"); FileOutputFormat.setOutputPath(conf, new Path(location)); // For testing purpose only remove output file if exists if (props.getBoolean("force.output.overwrite", false)) { FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf); fs.delete(FileOutputFormat.getOutputPath(conf), true); } } // Adds External jars to hadoop classpath String externalJarList = props.getString("hadoop.external.jarFiles", null); if (externalJarList != null) { FileSystem fs = FileSystem.get(conf); String[] jarFiles = externalJarList.split(","); for (String jarFile : jarFiles) { logger.info("Adding extenral jar File:" + jarFile); DistributedCache.addFileToClassPath(new Path(jarFile), conf, fs); } } // Adds distributed cache files String cacheFileList = props.getString("hadoop.cache.files", null); if (cacheFileList != null) { String[] cacheFiles = cacheFileList.split(","); for (String cacheFile : cacheFiles) { logger.info("Adding Distributed Cache File:" + cacheFile); DistributedCache.addCacheFile(new URI(cacheFile), conf); } } // Adds distributed cache files String archiveFileList = props.getString("hadoop.cache.archives", null); if (archiveFileList != null) { String[] archiveFiles = archiveFileList.split(","); for (String archiveFile : archiveFiles) { logger.info("Adding Distributed Cache Archive File:" + archiveFile); DistributedCache.addCacheArchive(new URI(archiveFile), conf); } } String hadoopCacheJarDir = props.getString("hdfs.default.classpath.dir", null); if (hadoopCacheJarDir != null) { FileSystem fs = FileSystem.get(conf); if (fs != null) { FileStatus[] status = fs.listStatus(new Path(hadoopCacheJarDir)); if (status != null) { for (int i = 0; i < status.length; ++i) { if (!status[i].isDir()) { Path path = new Path(hadoopCacheJarDir, status[i].getPath().getName()); logger.info("Adding Jar to Distributed Cache Archive File:" + path); DistributedCache.addFileToClassPath(path, conf, fs); } } } else { logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " is empty."); } } else { logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " filesystem doesn't exist"); } } for (String key : getProps().getKeySet()) { String lowerCase = key.toLowerCase(); if (lowerCase.startsWith(HADOOP_PREFIX)) { String newKey = key.substring(HADOOP_PREFIX.length()); conf.set(newKey, getProps().get(key)); } } HadoopUtils.setPropsInJob(conf, getProps()); // put in tokens if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) { conf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY, System.getenv(HADOOP_TOKEN_FILE_LOCATION)); } return conf; }
From source file:azkaban.jobtype.javautils.HadoopUtils.java
License:Apache License
public static JobConf addAllSubPaths(JobConf conf, Path path) throws IOException { if (shouldPathBeIgnored(path)) { throw new IllegalArgumentException(String.format("Path[%s] should be ignored.", path)); }// ww w . j a va 2 s. c o m final FileSystem fs = path.getFileSystem(conf); if (fs.exists(path)) { for (FileStatus status : fs.listStatus(path)) { if (!shouldPathBeIgnored(status.getPath())) { if (status.isDir()) { addAllSubPaths(conf, status.getPath()); } else { FileInputFormat.addInputPath(conf, status.getPath()); } } } } return conf; }
From source file:azkaban.viewer.hdfs.HdfsBrowserServlet.java
License:Apache License
private void displayDirPage(FileSystem fs, String user, HttpServletRequest req, HttpServletResponse resp, Session session, Path path) throws IOException { Page page = newPage(req, resp, session, "azkaban/viewer/hdfs/velocity/hdfs-browser.vm"); page.add("allowproxy", allowGroupProxy); page.add("viewerPath", viewerPath); page.add("viewerName", viewerName); List<Path> paths = new ArrayList<Path>(); List<String> segments = new ArrayList<String>(); getPathSegments(path, paths, segments); page.add("paths", paths); page.add("segments", segments); page.add("user", user); page.add("homedir", getHomeDir(fs)); try {/*from w ww .j av a 2 s . c o m*/ FileStatus[] subdirs = fs.listStatus(path); page.add("subdirs", subdirs); long size = 0; for (int i = 0; i < subdirs.length; ++i) { if (subdirs[i].isDir()) { continue; } size += subdirs[i].getLen(); } page.add("dirsize", size); } catch (AccessControlException e) { page.add("error_message", "Permission denied: " + e.getMessage()); page.add("no_fs", "true"); } catch (IOException e) { page.add("error_message", "Error: " + e.getMessage()); } page.render(); }
From source file:bb.BranchAndBound.java
License:Apache License
static Job getJob(String input, String output, String dataDir, int iteration) throws Exception { Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(conf); FileStatus[] fileStatus = hdfs.listStatus(new Path(input)); for (int i = 0; i < fileStatus.length; ++i) { if (fileStatus[i].getLen() == 0) { hdfs.delete(fileStatus[i].getPath()); }//from w w w. j ava2s.c o m } DistributedCache.addCacheFile(new URI(dataDir + "/data"), conf); Job ret = new Job(conf, dataDir + "_iteration_" + iteration); ret.setJarByClass(BranchAndBound.class); ret.setMapperClass(BBMapper1.class); ret.setReducerClass(BBReducer.class); //ret.setReducerClass(MergeReducer.class); FileInputFormat.setInputPaths(ret, new Path(input)); //if( iteration > 7 ) FileInputFormat.setMinInputSplitSize(ret, 67108864); FileOutputFormat.setOutputPath(ret, new Path(output)); ret.setOutputKeyClass(NullWritable.class); ret.setOutputValueClass(Text.class); return ret; }
From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducer.java
License:Apache License
private void getPgStartIndex(Configuration conf) { try {// w w w .j a v a 2s . c o m Path path = new Path(basePGDir); FileSystem fs = path.getFileSystem(new Configuration()); if (!fs.exists(path)) { pgStartIndex = 0; return; } int largestIx = 0; for (FileStatus file : fs.listStatus(path)) { String tmp = file.getPath().toString(); if (!tmp.contains("bucket")) { continue; } tmp = tmp.substring(tmp.lastIndexOf('/'), tmp.length()); int ix = Integer.parseInt(tmp.split("-")[1]); largestIx = Math.max(largestIx, ix); pgStartIndex += 1; } } catch (IOException e) { e.printStackTrace(); } }
From source file:be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineReducer.java
License:Open Source License
@Override protected void setup(Context context) throws IOException, InterruptedException { try {/*w ww .j a v a 2 s . c o m*/ // read header from input outpFormat = new KeyIgnoringVCFOutputFormat(VCFFormat.VCF); String input = HalvadeConf.getInputDir(context.getConfiguration()); String output = HalvadeConf.getOutDir(context.getConfiguration()); reportBest = HalvadeConf.getReportAllVariant(context.getConfiguration()); FileSystem fs = FileSystem.get(new URI(input), context.getConfiguration()); Path firstVcfFile = null; if (fs.getFileStatus(new Path(input)).isDirectory()) { // get first file FileStatus[] files = fs.listStatus(new Path(input)); int i = 0, l = files.length; while (i < l && !files[i].getPath().getName().endsWith(".vcf")) { i++; } if (i < l) { firstVcfFile = files[i].getPath(); } else { throw new InterruptedException("VCFCombineReducer: No files in input folder."); } } else { throw new InterruptedException("VCFCombineReducer: Input directory is not a directory."); } Logger.DEBUG("first file: " + firstVcfFile); outpFormat.readHeaderFrom(firstVcfFile, fs); recordWriter = outpFormat.getRecordWriter(context, new Path(output + "HalvadeCombined.vcf")); } catch (URISyntaxException ex) { Logger.EXCEPTION(ex); throw new InterruptedException("URI for input directory is invalid."); } }
From source file:be.ugent.intec.halvade.HalvadeOptions.java
License:Open Source License
protected double getInputSize(String input, Configuration conf) throws URISyntaxException, IOException { double size = 0; FileSystem fs = FileSystem.get(new URI(input), conf); if (fs.getFileStatus(new Path(input)).isDirectory()) { // add every file in directory FileStatus[] files = fs.listStatus(new Path(input)); for (FileStatus file : files) { if (!file.isDirectory()) { size += file.getLen();//from w w w. j a v a2 s. com } } } else { size += fs.getFileStatus(new Path(input)).getLen(); } return (size / (1024 * 1024 * 1024)); }