List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:edu.ncku.ikdd.DataMining.java
public static void main(String[] argv) throws Exception { int candidateLength = 1; FileSystem dfs = FileSystem.get(new Configuration()); do {/*from w w w . j a va 2s.c o m*/ JobConf countConf = new JobConf(DataMining.class); countConf.setOutputKeyClass(Text.class); countConf.setOutputValueClass(IntWritable.class); countConf.setMapperClass(CountMap.class); countConf.setCombinerClass(CountCombine.class); countConf.setReducerClass(CountReduce.class); countConf.setInputFormat(TextInputFormat.class); countConf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(countConf, new Path(argv[0])); FileOutputFormat.setOutputPath(countConf, new Path(count_path + String.valueOf(candidateLength))); countConf.setInt("minSupport", Integer.valueOf(argv[2])); countConf.setInt("candidateLength", candidateLength); JobClient.runJob(countConf); ++candidateLength; JobConf candidateConf = new JobConf(DataMining.class); candidateConf.setOutputKeyClass(Text.class); candidateConf.setOutputValueClass(Text.class); candidateConf.setMapperClass(CandidateMap.class); candidateConf.setReducerClass(CandidateReduce.class); candidateConf.setInputFormat(TextInputFormat.class); candidateConf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(candidateConf, new Path(count_path + String.valueOf(candidateLength - 1) + "/part-00000")); FileOutputFormat.setOutputPath(candidateConf, new Path(candidate_path + String.valueOf(candidateLength))); candidateConf.setInt("candidateLength", candidateLength); JobClient.runJob(candidateConf); } while (dfs.getFileStatus(new Path(candidate_path + String.valueOf(candidateLength) + "/part-00000")) .getLen() > 0); BufferedReader br; BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(dfs.create(new Path(argv[1] + "/part-00000")))); String line; for (int i = 1; i < candidateLength; ++i) { br = new BufferedReader( new InputStreamReader(dfs.open(new Path(count_path + String.valueOf(i) + "/part-00000")))); while ((line = br.readLine()) != null) { bw.write(line + "\n"); } br.close(); } bw.close(); }
From source file:edu.stolaf.cs.wmrserver.JobServiceHandler.java
License:Apache License
public static FileStatus[] listInputFiles(FileSystem fs, Path path) throws IOException { if (!fs.isDirectory(path)) return new FileStatus[] { fs.getFileStatus(path) }; else {/*from w w w.j a v a 2 s . com*/ // Get all files in directory that are not directories or hidden files final FileSystem fsFinal = fs; PathFilter filter = new PathFilter() { public boolean accept(Path p) { try { return !(fsFinal.isDirectory(p) || p.getName().startsWith(".") || p.getName().startsWith("_")); } catch (IOException ex) { throw new RuntimeException("Error filtering files.", ex); } } }; return fs.listStatus(path, filter); } }
From source file:edu.uci.ics.asterix.aoya.AsterixApplicationMaster.java
License:Apache License
/** * Sets up the parameters for the Asterix config. * //from ww w . j a va2 s.c o m * @throws IOException */ private void distributeAsterixConfig() throws IOException { FileSystem fs = FileSystem.get(conf); String pathSuffix = instanceConfPath + File.separator + ASTERIX_CONF_NAME; Path dst = new Path(dfsBasePath, pathSuffix); URI paramLocation = dst.toUri(); FileStatus paramFileStatus = fs.getFileStatus(dst); Long paramLen = paramFileStatus.getLen(); Long paramTimestamp = paramFileStatus.getModificationTime(); LocalResource asterixParamLoc = Records.newRecord(LocalResource.class); asterixParamLoc.setType(LocalResourceType.FILE); asterixParamLoc.setVisibility(LocalResourceVisibility.PRIVATE); asterixParamLoc.setResource(ConverterUtils.getYarnUrlFromURI(paramLocation)); asterixParamLoc.setTimestamp(paramTimestamp); asterixParamLoc.setSize(paramLen); localResources.put(ASTERIX_CONF_NAME, asterixParamLoc); }
From source file:edu.uci.ics.asterix.aoya.AsterixApplicationMaster.java
License:Apache License
/** * Here I am just pointing the Containers to the exisiting HDFS resources given by the Client * filesystem of the nodes.//from w w w. ja v a 2 s. c om * * @throws IOException */ private void localizeDFSResources() throws IOException { //if performing an 'offline' task, skip a lot of resource distribution if (obliterate || backup || restore) { if (appMasterJar == null || ("").equals(appMasterJar)) { //this can happen in a jUnit testing environment. we don't need to set it there. if (!conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { throw new IllegalStateException("AM jar not provided in environment."); } else { return; } } FileSystem fs = FileSystem.get(conf); FileStatus appMasterJarStatus = fs.getFileStatus(appMasterJar); LocalResource obliteratorJar = Records.newRecord(LocalResource.class); obliteratorJar.setType(LocalResourceType.FILE); obliteratorJar.setVisibility(LocalResourceVisibility.PRIVATE); obliteratorJar.setResource(ConverterUtils.getYarnUrlFromPath(appMasterJar)); obliteratorJar.setTimestamp(appMasterJarStatus.getModificationTime()); obliteratorJar.setSize(appMasterJarStatus.getLen()); localResources.put("asterix-yarn.jar", obliteratorJar); LOG.info(localResources.values()); return; } //otherwise, distribute evertything to start up asterix LocalResource asterixZip = Records.newRecord(LocalResource.class); //this un-tar's the asterix distribution asterixZip.setType(LocalResourceType.ARCHIVE); asterixZip.setVisibility(LocalResourceVisibility.PRIVATE); try { asterixZip.setResource(ConverterUtils.getYarnUrlFromURI(new URI(asterixZipPath))); } catch (URISyntaxException e) { LOG.error("Error locating Asterix zip" + " in env, path=" + asterixZipPath); throw new IOException(e); } asterixZip.setTimestamp(asterixZipTimestamp); asterixZip.setSize(asterixZipLen); localResources.put(ASTERIX_ZIP_NAME, asterixZip); //now let's do the same for the cluster description XML LocalResource asterixConf = Records.newRecord(LocalResource.class); asterixConf.setType(LocalResourceType.FILE); asterixConf.setVisibility(LocalResourceVisibility.PRIVATE); try { asterixConf.setResource(ConverterUtils.getYarnUrlFromURI(new URI(asterixConfPath))); } catch (URISyntaxException e) { LOG.error("Error locating Asterix config" + " in env, path=" + asterixConfPath); throw new IOException(e); } //TODO: I could avoid localizing this everywhere by only calling this block on the metadata node. asterixConf.setTimestamp(asterixConfTimestamp); asterixConf.setSize(asterixConfLen); localResources.put("cluster-config.xml", asterixConf); //now add the libraries if there are any try { FileSystem fs = FileSystem.get(conf); Path p = new Path(dfsBasePath, instanceConfPath + File.separator + "library" + Path.SEPARATOR); if (fs.exists(p)) { FileStatus[] dataverses = fs.listStatus(p); for (FileStatus d : dataverses) { if (!d.isDirectory()) throw new IOException("Library configuration directory structure is incorrect"); FileStatus[] libraries = fs.listStatus(d.getPath()); for (FileStatus l : libraries) { if (l.isDirectory()) throw new IOException("Library configuration directory structure is incorrect"); LocalResource lr = Records.newRecord(LocalResource.class); lr.setResource(ConverterUtils.getYarnUrlFromURI(l.getPath().toUri())); lr.setSize(l.getLen()); lr.setTimestamp(l.getModificationTime()); lr.setType(LocalResourceType.ARCHIVE); lr.setVisibility(LocalResourceVisibility.PRIVATE); localResources.put("library" + Path.SEPARATOR + d.getPath().getName() + Path.SEPARATOR + l.getPath().getName().split("\\.")[0], lr); LOG.info("Found library: " + l.getPath().toString()); LOG.info(l.getPath().getName()); } } } } catch (FileNotFoundException e) { LOG.info("No external libraries present"); //do nothing, it just means there aren't libraries. that is possible and ok // it should be handled by the fs.exists(p) check though. } LOG.info(localResources.values()); }
From source file:edu.uci.ics.asterix.aoya.AsterixYARNClient.java
License:Apache License
/** * Upload the Asterix cluster description on to the DFS. This will persist the state of the instance. * /*from w ww . ja va 2s . com*/ * @return * @throws YarnException * @throws IOException */ private List<DFSResourceCoordinate> deployConfig() throws YarnException, IOException { FileSystem fs = FileSystem.get(conf); List<DFSResourceCoordinate> resources = new ArrayList<DFSResourceCoordinate>(2); String pathSuffix = CONF_DIR_REL + instanceFolder + CONFIG_DEFAULT_NAME; Path dstConf = new Path(fs.getHomeDirectory(), pathSuffix); FileStatus destStatus; try { destStatus = fs.getFileStatus(dstConf); } catch (IOException e) { throw new YarnException("Asterix instance by that name does not appear to exist in DFS"); } LocalResource asterixConfLoc = Records.newRecord(LocalResource.class); asterixConfLoc.setType(LocalResourceType.FILE); asterixConfLoc.setVisibility(LocalResourceVisibility.PRIVATE); asterixConfLoc.setResource(ConverterUtils.getYarnUrlFromPath(dstConf)); asterixConfLoc.setTimestamp(destStatus.getModificationTime()); DFSResourceCoordinate conf = new DFSResourceCoordinate(); conf.envs.put(dstConf.toUri().toString(), AConstants.CONFLOCATION); conf.envs.put(Long.toString(asterixConfLoc.getSize()), AConstants.CONFLEN); conf.envs.put(Long.toString(asterixConfLoc.getTimestamp()), AConstants.CONFTIMESTAMP); conf.name = CONFIG_DEFAULT_NAME; conf.res = asterixConfLoc; resources.add(conf); return resources; }
From source file:edu.uci.ics.asterix.aoya.AsterixYARNClient.java
License:Apache License
/** * Install the current Asterix parameters to the DFS. This can be modified via alter. * // ww w . j a v a 2s . c o m * @throws YarnException * @throws IOException */ private void installConfig() throws YarnException, IOException { FileSystem fs = FileSystem.get(conf); String pathSuffix = CONF_DIR_REL + instanceFolder + CONFIG_DEFAULT_NAME; Path dstConf = new Path(fs.getHomeDirectory(), pathSuffix); try { fs.getFileStatus(dstConf); if (mode == Mode.INSTALL) { throw new IllegalStateException("Instance with this name already exists."); } } catch (FileNotFoundException e) { if (mode == Mode.START) { throw new IllegalStateException("Instance does not exist for this user", e); } } if (mode == Mode.INSTALL) { Path src = new Path(asterixConf); fs.copyFromLocalFile(false, true, src, dstConf); } }
From source file:edu.uci.ics.asterix.aoya.AsterixYARNClient.java
License:Apache License
/** * Finds the minimal classes and JARs needed to start the AM only. * @return Resources the AM needs to start on the initial container. * @throws IllegalStateException/* w ww .j a v a2 s . c o m*/ * @throws IOException */ private List<DFSResourceCoordinate> installAmLibs() throws IllegalStateException, IOException { List<DFSResourceCoordinate> resources = new ArrayList<DFSResourceCoordinate>(2); FileSystem fs = FileSystem.get(conf); String fullLibPath = CONF_DIR_REL + instanceFolder + "am_jars" + Path.SEPARATOR; String[] cp = System.getProperty("java.class.path").split(System.getProperty("path.separator")); String asterixJarPattern = "^(asterix).*(jar)$"; //starts with asterix,ends with jar String commonsJarPattern = "^(commons).*(jar)$"; String surefireJarPattern = "^(surefire).*(jar)$"; //for maven tests String jUnitTestPattern = "^(asterix-yarn" + File.separator + "target)$"; LOG.info(File.separator); for (String j : cp) { String[] pathComponents = j.split(Pattern.quote(File.separator)); LOG.info(j); LOG.info(pathComponents[pathComponents.length - 1]); if (pathComponents[pathComponents.length - 1].matches(asterixJarPattern) || pathComponents[pathComponents.length - 1].matches(commonsJarPattern) || pathComponents[pathComponents.length - 1].matches(surefireJarPattern) || pathComponents[pathComponents.length - 1].matches(jUnitTestPattern)) { LOG.info("Loading JAR/classpath: " + j); File f = new File(j); Path dst = new Path(fs.getHomeDirectory(), fullLibPath + f.getName()); if (!fs.exists(dst) || refresh) { fs.copyFromLocalFile(false, true, new Path(f.getAbsolutePath()), dst); } FileStatus dstSt = fs.getFileStatus(dst); LocalResource amLib = Records.newRecord(LocalResource.class); amLib.setType(LocalResourceType.FILE); amLib.setVisibility(LocalResourceVisibility.PRIVATE); amLib.setResource(ConverterUtils.getYarnUrlFromPath(dst)); amLib.setTimestamp(dstSt.getModificationTime()); amLib.setSize(dstSt.getLen()); DFSResourceCoordinate amLibCoord = new DFSResourceCoordinate(); amLibCoord.res = amLib; amLibCoord.name = f.getName(); if (f.getName().contains("asterix-yarn") || f.getName().contains("surefire")) { amLibCoord.envs.put(dst.toUri().toString(), AConstants.APPLICATIONMASTERJARLOCATION); amLibCoord.envs.put(Long.toString(dstSt.getLen()), AConstants.APPLICATIONMASTERJARLEN); amLibCoord.envs.put(Long.toString(dstSt.getModificationTime()), AConstants.APPLICATIONMASTERJARTIMESTAMP); } resources.add(amLibCoord); } } if (resources.size() == 0) { throw new IOException("Required JARs are missing. Please check your directory structure"); } return resources; }
From source file:edu.uci.ics.asterix.aoya.AsterixYARNClient.java
License:Apache License
/** * Uploads binary resources to HDFS for use by the AM * @return/* w w w . j av a 2 s.c o m*/ * @throws IOException * @throws YarnException */ public List<DFSResourceCoordinate> distributeBinaries() throws IOException, YarnException { List<DFSResourceCoordinate> resources = new ArrayList<DFSResourceCoordinate>(2); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); Path src, dst; FileStatus destStatus; String pathSuffix; // adding info so we can add the jar to the App master container path // Add the asterix tarfile to HDFS for easy distribution // Keep it all archived for now so add it as a file... pathSuffix = CONF_DIR_REL + instanceFolder + "asterix-server.zip"; dst = new Path(fs.getHomeDirectory(), pathSuffix); if (refresh) { if (fs.exists(dst)) { fs.delete(dst, false); } } if (!fs.exists(dst)) { src = new Path(asterixZip); LOG.info("Copying Asterix distributable to DFS"); fs.copyFromLocalFile(false, true, src, dst); } destStatus = fs.getFileStatus(dst); LocalResource asterixTarLoc = Records.newRecord(LocalResource.class); asterixTarLoc.setType(LocalResourceType.ARCHIVE); asterixTarLoc.setVisibility(LocalResourceVisibility.PRIVATE); asterixTarLoc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); asterixTarLoc.setTimestamp(destStatus.getModificationTime()); // adding info so we can add the tarball to the App master container path DFSResourceCoordinate tar = new DFSResourceCoordinate(); tar.envs.put(dst.toUri().toString(), AConstants.TARLOCATION); tar.envs.put(Long.toString(asterixTarLoc.getSize()), AConstants.TARLEN); tar.envs.put(Long.toString(asterixTarLoc.getTimestamp()), AConstants.TARTIMESTAMP); tar.res = asterixTarLoc; tar.name = "asterix-server.zip"; resources.add(tar); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.PRIVATE); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); DFSResourceCoordinate l4j = new DFSResourceCoordinate(); tar.res = log4jRsrc; tar.name = "log4j.properties"; resources.add(l4j); } resources.addAll(installAmLibs()); return resources; }
From source file:edu.uci.ics.asterix.external.adapter.factory.HDFSAdapterFactory.java
License:Apache License
/** * Instead of creating the split using the input format, we do it manually * This function returns fileSplits (1 per hdfs file block) irrespective of the number of partitions * and the produced splits only cover intersection between current files in hdfs and files stored internally * in AsterixDB//from www . j av a 2 s . co m * 1. NoOp means appended file * 2. AddOp means new file * 3. UpdateOp means the delta of a file * * @return * @throws IOException */ protected InputSplit[] getSplits(JobConf conf) throws IOException { // Create file system object FileSystem fs = FileSystem.get(conf); ArrayList<FileSplit> fileSplits = new ArrayList<FileSplit>(); ArrayList<ExternalFile> orderedExternalFiles = new ArrayList<ExternalFile>(); // Create files splits for (ExternalFile file : files) { Path filePath = new Path(file.getFileName()); FileStatus fileStatus; try { fileStatus = fs.getFileStatus(filePath); } catch (FileNotFoundException e) { // file was deleted at some point, skip to next file continue; } if (file.getPendingOp() == ExternalFilePendingOp.PENDING_ADD_OP && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) { // Get its information from HDFS name node BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, file.getSize()); // Create a split per block for (BlockLocation block : fileBlocks) { if (block.getOffset() < file.getSize()) { fileSplits.add(new FileSplit(filePath, block.getOffset(), (block.getLength() + block.getOffset()) < file.getSize() ? block.getLength() : (file.getSize() - block.getOffset()), block.getHosts())); orderedExternalFiles.add(file); } } } else if (file.getPendingOp() == ExternalFilePendingOp.PENDING_NO_OP && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) { long oldSize = 0L; long newSize = file.getSize(); for (int i = 0; i < files.size(); i++) { if (files.get(i).getFileName() == file.getFileName() && files.get(i).getSize() != file.getSize()) { newSize = files.get(i).getSize(); oldSize = file.getSize(); break; } } // Get its information from HDFS name node BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, newSize); // Create a split per block for (BlockLocation block : fileBlocks) { if (block.getOffset() + block.getLength() > oldSize) { if (block.getOffset() < newSize) { // Block interact with delta -> Create a split long startCut = (block.getOffset() > oldSize) ? 0L : oldSize - block.getOffset(); long endCut = (block.getOffset() + block.getLength() < newSize) ? 0L : block.getOffset() + block.getLength() - newSize; long splitLength = block.getLength() - startCut - endCut; fileSplits.add(new FileSplit(filePath, block.getOffset() + startCut, splitLength, block.getHosts())); orderedExternalFiles.add(file); } } } } } fs.close(); files = orderedExternalFiles; return fileSplits.toArray(new FileSplit[fileSplits.size()]); }
From source file:edu.umd.cloud9.util.ReadSequenceFile.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length < 1) { System.out.println("args: [path] [max-num-of-records-per-file]"); System.exit(-1);//from ww w . j a va 2s. c o m } String f = args[0]; int max = Integer.MAX_VALUE; if (args.length >= 2) { max = Integer.parseInt(args[1]); } Configuration config = new JobConf(); FileSystem fileSys = FileSystem.get(config); Path p = new Path(f); if (fileSys.getFileStatus(p).isDir()) readSequenceFilesInDir(p, max); else readSequenceFile(p, max); }