List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(URI uri, Configuration conf) throws IOException
From source file:be.ugent.intec.halvade.utils.HalvadeConf.java
License:Open Source License
public static void clearTaskFiles(Configuration conf) throws IOException, URISyntaxException { String filepath = conf.get(outdir) + tasksDone; FileSystem fs = FileSystem.get(new URI(filepath), conf); fs.delete(new Path(filepath), true); }
From source file:be.ugent.intec.halvade.utils.HalvadeConf.java
License:Open Source License
public static boolean addTaskRunning(Configuration conf, String val) throws IOException, URISyntaxException { val = val.substring(0, val.lastIndexOf("_")); // rewrite file if second attempt String filepath = conf.get(outdir) + tasksDone + val; FileSystem fs = FileSystem.get(new URI(filepath), conf); return fs.createNewFile(new Path(filepath)); }
From source file:be.ugent.intec.halvade.utils.HalvadeConf.java
License:Open Source License
public static int getMapTasksLeft(Configuration conf) throws IOException, URISyntaxException { int containers = conf.getInt(totalContainers, 1); int tasks = 0; String filedir = conf.get(outdir) + tasksDone; FileSystem fs = FileSystem.get(new URI(filedir), conf); FileStatus[] files = fs.listStatus(new Path(filedir)); for (FileStatus file : files) { if (!file.isDirectory()) { tasks++;/*from w ww. jav a 2 s . co m*/ } } Logger.DEBUG("containers left: " + (Integer.parseInt(conf.get("mapred.map.tasks")) - tasks)); return Integer.parseInt(conf.get("mapred.map.tasks")) - tasks; }
From source file:be.ugent.intec.halvade.utils.HalvadeConf.java
License:Open Source License
public static boolean allTasksCompleted(Configuration conf) throws IOException, URISyntaxException { int tasks = 0; String filedir = conf.get(outdir) + tasksDone; FileSystem fs = FileSystem.get(new URI(filedir), conf); FileStatus[] files = fs.listStatus(new Path(filedir)); for (FileStatus file : files) { if (!file.isDirectory()) { tasks++;//from w w w . j a v a 2s . co m } } Logger.DEBUG("tasks started: " + tasks); return tasks >= Integer.parseInt(conf.get("mapred.map.tasks")); }
From source file:be.ugent.intec.halvade.utils.HalvadeFileUtils.java
License:Open Source License
public static String downloadGFF(TaskInputOutputContext context, String id) throws IOException, URISyntaxException, InterruptedException { Configuration conf = context.getConfiguration(); String refDir = HalvadeConf.getRefDirOnScratch(conf); String gff = HalvadeConf.getGff(context.getConfiguration()); if (gff == null) return null; String gffSuffix = null;// w w w.j ava2 s . co m int si = gff.lastIndexOf('.'); if (si > 0) gffSuffix = gff.substring(si); else throw new InterruptedException("Illegal filename for gff file: " + gff); Logger.DEBUG("suffix: " + gffSuffix); if (!refDir.endsWith("/")) refDir = refDir + "/"; HalvadeFileLock lock = new HalvadeFileLock(context, refDir, GFF_LOCK); String gffFile = null; try { lock.getLock(); ByteBuffer bytes = ByteBuffer.allocate(4); if (lock.read(bytes) > 0) { bytes.flip(); long val = bytes.getInt(); if (val == DEFAULT_LOCK_VAL) Logger.DEBUG("gff has been downloaded to local scratch: " + val); else { Logger.INFO("downloading missing gff file to local scratch"); FileSystem fs = FileSystem.get(new URI(gff), conf); gffFile = findFile(refDir, gffSuffix, false); if (gffFile == null) gffFile = refDir + id; attemptDownloadFileFromHDFS(context, fs, gff, gffFile + gffSuffix, RETRIES); Logger.INFO("FINISHED downloading the complete reference index to local scratch"); bytes.clear(); bytes.putInt(DEFAULT_LOCK_VAL).flip(); lock.forceWrite(bytes); } } else { Logger.INFO("downloading missing gff file to local scratch"); Logger.DEBUG("gff file: " + gff); FileSystem fs = FileSystem.get(new URI(gff), conf); gffFile = findFile(refDir, gffSuffix, false); if (gffFile == null) gffFile = refDir + id; attemptDownloadFileFromHDFS(context, fs, gff, gffFile + gffSuffix, RETRIES); Logger.INFO("FINISHED downloading the complete reference index to local scratch"); bytes.clear(); bytes.putInt(DEFAULT_LOCK_VAL).flip(); lock.forceWrite(bytes); } } catch (InterruptedException ex) { Logger.EXCEPTION(ex); } finally { lock.releaseLock(); } if (gffFile == null) gffFile = findFile(refDir, gffSuffix, false); return gffFile + gffSuffix; }
From source file:be.ugent.intec.halvade.utils.HalvadeFileUtils.java
License:Open Source License
protected static String downloadAlignerIndex(TaskInputOutputContext context, String id, String refName, String refSuffix, String[] refFiles) throws IOException, URISyntaxException { Configuration conf = context.getConfiguration(); String refDir = HalvadeConf.getRefDirOnScratch(conf); if (!refDir.endsWith("/")) refDir = refDir + "/"; HalvadeFileLock lock = new HalvadeFileLock(context, refDir, REF_LOCK); String refBase = null;//from www. jav a 2 s . c o m try { lock.getLock(); ByteBuffer bytes = ByteBuffer.allocate(4); if (lock.read(bytes) > 0) { bytes.flip(); long val = bytes.getInt(); if (val == REF_BOTH) Logger.DEBUG("reference has been downloaded to local scratch: " + val); else { Logger.INFO("downloading missing reference index files to local scratch"); String HDFSRef = HalvadeConf.getRefOnHDFS(conf); FileSystem fs = FileSystem.get(new URI(HDFSRef), conf); refBase = findFile(refDir, refSuffix, false); // refSuffix = HALVADE_BWA_SUFFIX boolean foundExisting = (refBase != null); if (!foundExisting) refBase = refDir + refName + id; // refName = bwa_ref- for (String suffix : refFiles) { // refFiles = BWA_REF_FILES attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES); } Logger.INFO("FINISHED downloading the complete reference index to local scratch"); if (!foundExisting) { File f = new File(refBase + refSuffix); f.createNewFile(); f = new File(refBase + HALVADE_GATK_SUFFIX); f.createNewFile(); } bytes.clear(); bytes.putInt(REF_BOTH).flip(); lock.forceWrite(bytes); } } else { Logger.INFO("downloading missing reference index files to local scratch"); String HDFSRef = HalvadeConf.getRefOnHDFS(conf); FileSystem fs = FileSystem.get(new URI(HDFSRef), conf); refBase = findFile(refDir, refSuffix, false); boolean foundExisting = (refBase != null); if (!foundExisting) refBase = refDir + refName + id; for (String suffix : refFiles) { attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES); } Logger.INFO("FINISHED downloading the complete reference index to local scratch"); if (!foundExisting) { File f = new File(refBase + refSuffix); f.createNewFile(); f = new File(refBase + HALVADE_GATK_SUFFIX); f.createNewFile(); } bytes.clear(); bytes.putInt(REF_BOTH).flip(); lock.forceWrite(bytes); } } catch (InterruptedException ex) { Logger.EXCEPTION(ex); } finally { lock.releaseLock(); } if (refBase == null) refBase = findFile(refDir, refSuffix, false); return refBase + refFiles[0]; }
From source file:be.ugent.intec.halvade.utils.HalvadeFileUtils.java
License:Open Source License
public static String downloadGATKIndex(TaskInputOutputContext context, String id) throws IOException, URISyntaxException { Configuration conf = context.getConfiguration(); String tmpDir = HalvadeConf.getScratchTempDir(conf); String refDir = HalvadeConf.getRefDirOnScratch(conf); if (!refDir.endsWith("/")) refDir = refDir + "/"; HalvadeFileLock lock = new HalvadeFileLock(context, refDir, REF_LOCK); String refBase = null;// ww w .j a va2 s . c o m try { lock.getLock(); ByteBuffer bytes = ByteBuffer.allocate(4); if (lock.read(bytes) > 0) { bytes.flip(); long val = bytes.getInt(); if (val == REF_BOTH || val == DEFAULT_LOCK_VAL) Logger.DEBUG("reference has been downloaded to local scratch: " + val); else { Logger.INFO("downloading missing reference index files to local scratch"); String HDFSRef = HalvadeConf.getRefOnHDFS(conf); FileSystem fs = FileSystem.get(new URI(HDFSRef), conf); refBase = findFile(refDir, HALVADE_GATK_SUFFIX, false); boolean foundExisting = (refBase != null); if (!foundExisting) refBase = refDir + "bwa_ref-" + id; for (String suffix : GATK_REF_FILES) { attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES); } Logger.INFO("FINISHED downloading the complete reference index to local scratch"); if (!foundExisting) { File f = new File(refBase + HALVADE_GATK_SUFFIX); f.createNewFile(); } bytes.clear(); bytes.putInt(DEFAULT_LOCK_VAL).flip(); lock.forceWrite(bytes); } } else { Logger.INFO("downloading missing reference index files to local scratch"); String HDFSRef = HalvadeConf.getRefOnHDFS(conf); FileSystem fs = FileSystem.get(new URI(HDFSRef), conf); refBase = findFile(refDir, HALVADE_GATK_SUFFIX, false); boolean foundExisting = (refBase != null); if (!foundExisting) refBase = refDir + "bwa_ref-" + id; for (String suffix : GATK_REF_FILES) { attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES); } Logger.INFO("FINISHED downloading the complete reference index to local scratch"); if (!foundExisting) { File f = new File(refBase + HALVADE_GATK_SUFFIX); f.createNewFile(); } bytes.clear(); bytes.putInt(DEFAULT_LOCK_VAL).flip(); lock.forceWrite(bytes); } } catch (InterruptedException ex) { Logger.EXCEPTION(ex); } finally { lock.releaseLock(); } if (refBase == null) refBase = findFile(refDir, HALVADE_GATK_SUFFIX, false); return refBase + GATK_REF_FILES[0]; }
From source file:be.ugent.intec.halvade.utils.HalvadeFileUtils.java
License:Open Source License
public static String downloadSTARIndex(TaskInputOutputContext context, String id, boolean usePass2Genome) throws IOException, URISyntaxException { Configuration conf = context.getConfiguration(); String tmpDir = HalvadeConf.getScratchTempDir(conf); String refDir = HalvadeConf.getRefDirOnScratch(conf); if (!refDir.endsWith("/")) refDir = refDir + "/"; HalvadeFileLock lock = new HalvadeFileLock(context, tmpDir, STARG_LOCK); String refBase = null;//ww w. ja va2s. c o m try { lock.getLock(); ByteBuffer bytes = ByteBuffer.allocate(4); if (lock.read(bytes) > 0) { bytes.flip(); long val = bytes.getInt(); if (val == DEFAULT_LOCK_VAL) Logger.DEBUG("reference has been downloaded to local scratch: " + val); else { Logger.INFO("downloading missing reference index files to local scratch"); if (usePass2Genome) Logger.DEBUG("using Pass2 genome"); String HDFSRef = usePass2Genome ? HalvadeConf.getStarDirPass2HDFS(conf) : HalvadeConf.getStarDirOnHDFS(conf); Logger.DEBUG("downloading STAR genome from: " + HDFSRef); FileSystem fs = FileSystem.get(new URI(HDFSRef), conf); refBase = findFile(refDir, usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1, true); boolean foundExisting = (refBase != null); if (!foundExisting) { refBase = refDir + id + "-star/"; //make dir File makeRefDir = new File(refBase); makeRefDir.mkdir(); } Logger.DEBUG("STAR dir: " + refBase); if (!usePass2Genome) { for (String suffix : STAR_REF_FILES) { attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES); } for (String suffix : STAR_REF_OPTIONAL_FILES) { if (fs.exists(new Path(HDFSRef + suffix))) attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES); } } Logger.INFO("FINISHED downloading the complete reference index to local scratch"); if (!foundExisting) { File f = new File( refBase + (usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1)); f.createNewFile(); } bytes.clear(); bytes.putInt(DEFAULT_LOCK_VAL).flip(); lock.forceWrite(bytes); } } else { Logger.INFO("downloading missing reference index files to local scratch"); if (usePass2Genome) Logger.DEBUG("using Pass2 genome"); String HDFSRef = usePass2Genome ? HalvadeConf.getStarDirPass2HDFS(conf) : HalvadeConf.getStarDirOnHDFS(conf); Logger.DEBUG("downloading STAR genome from: " + HDFSRef); FileSystem fs = FileSystem.get(new URI(HDFSRef), conf); refBase = findFile(refDir, usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1, true); boolean foundExisting = (refBase != null); if (!foundExisting) { refBase = refDir + id + "-star/"; //make dir File makeRefDir = new File(refBase); makeRefDir.mkdir(); } Logger.DEBUG("STAR dir: " + refBase); if (!usePass2Genome) { for (String suffix : STAR_REF_FILES) { attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES); } for (String suffix : STAR_REF_OPTIONAL_FILES) { if (fs.exists(new Path(HDFSRef + suffix))) attemptDownloadFileFromHDFS(context, fs, HDFSRef + suffix, refBase + suffix, RETRIES); } } Logger.INFO("FINISHED downloading the complete reference index to local scratch"); if (!foundExisting) { File f = new File(refBase + (usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1)); f.createNewFile(); } bytes.clear(); bytes.putInt(DEFAULT_LOCK_VAL).flip(); lock.forceWrite(bytes); } } catch (InterruptedException ex) { Logger.EXCEPTION(ex); } finally { lock.releaseLock(); } if (refBase == null) refBase = findFile(refDir, usePass2Genome ? HALVADE_STAR_SUFFIX_P2 : HALVADE_STAR_SUFFIX_P1, true); return refBase; }
From source file:be.ugent.intec.halvade.utils.HalvadeFileUtils.java
License:Open Source License
public static String[] downloadSites(TaskInputOutputContext context, String id) throws IOException, URISyntaxException, InterruptedException { Configuration conf = context.getConfiguration(); String tmpDir = HalvadeConf.getScratchTempDir(conf); String refDir = HalvadeConf.getRefDirOnScratch(conf); String HDFSsites[] = HalvadeConf.getKnownSitesOnHDFS(conf); String[] localSites = new String[HDFSsites.length]; if (!refDir.endsWith("/")) refDir = refDir + "/"; HalvadeFileLock lock = new HalvadeFileLock(context, refDir, DBSNP_LOCK); String refBase = null;//from w w w.j a va 2s . c o m try { lock.getLock(); ByteBuffer bytes = ByteBuffer.allocate(4); if (lock.read(bytes) > 0) { bytes.flip(); long val = bytes.getInt(); if (val == DEFAULT_LOCK_VAL) Logger.DEBUG("dbSNP has been downloaded to local scratch: " + val); else { Logger.INFO("downloading missing dbSNP to local scratch"); refBase = findFile(refDir, HALVADE_DBSNP_SUFFIX, true); boolean foundExisting = (refBase != null); if (!foundExisting) { refBase = refDir + id + "-dbsnp/"; //make dir File makeRefDir = new File(refBase); makeRefDir.mkdir(); } Logger.DEBUG("dbSNP dir: " + refBase); for (int i = 0; i < HDFSsites.length; i++) { String fullName = HDFSsites[i]; String name = fullName.substring(fullName.lastIndexOf('/') + 1); Logger.DEBUG("Downloading " + name); FileSystem fs = FileSystem.get(new URI(fullName), conf); attemptDownloadFileFromHDFS(context, fs, fullName, refBase + name, RETRIES); localSites[i] = refBase + name; // attempt to download .idx file if (!foundExisting && fs.exists(new Path(fullName + ".idx"))) attemptDownloadFileFromHDFS(context, fs, fullName + ".idx", refBase + name + ".idx", RETRIES); } Logger.INFO("finished downloading the new sites to local scratch"); if (!foundExisting) { File f = new File(refBase + HALVADE_DBSNP_SUFFIX); f.createNewFile(); } bytes.clear(); bytes.putInt(DEFAULT_LOCK_VAL).flip(); lock.forceWrite(bytes); } } else { Logger.INFO("downloading missing dbSNP to local scratch"); refBase = findFile(refDir, HALVADE_DBSNP_SUFFIX, true); boolean foundExisting = (refBase != null); if (!foundExisting) { refBase = refDir + id + "-dbsnp/"; //make dir File makeRefDir = new File(refBase); makeRefDir.mkdir(); } Logger.DEBUG("dbSNP dir: " + refBase); for (int i = 0; i < HDFSsites.length; i++) { String fullName = HDFSsites[i]; String name = fullName.substring(fullName.lastIndexOf('/') + 1); Logger.DEBUG("Downloading " + name); FileSystem fs = FileSystem.get(new URI(fullName), conf); attemptDownloadFileFromHDFS(context, fs, fullName, refBase + name, RETRIES); localSites[i] = refBase + name; // attempt to download .idx file if (!foundExisting && fs.exists(new Path(fullName + ".idx"))) attemptDownloadFileFromHDFS(context, fs, fullName + ".idx", refBase + name + ".idx", RETRIES); } Logger.INFO("finished downloading the new sites to local scratch"); if (!foundExisting) { File f = new File(refBase + HALVADE_DBSNP_SUFFIX); f.createNewFile(); } bytes.clear(); bytes.putInt(DEFAULT_LOCK_VAL).flip(); lock.forceWrite(bytes); } } catch (InterruptedException ex) { Logger.EXCEPTION(ex); } finally { lock.releaseLock(); } if (refBase == null) { refBase = findFile(refDir, HALVADE_DBSNP_SUFFIX, true); File dir = new File(refBase); File[] directoryListing = dir.listFiles(); if (directoryListing != null) { int found = 0; for (int i = 0; i < HDFSsites.length; i++) { String fullName = HDFSsites[i]; String name = fullName.substring(fullName.lastIndexOf('/') + 1); localSites[i] = refBase + name; if ((new File(localSites[i])).exists()) found++; else Logger.DEBUG(name + " not found in local scratch"); } if (found != HDFSsites.length) { throw new IOException( refBase + " has different number of files: " + found + " vs " + localSites.length); } } else { throw new IOException(refBase + " has no files"); } } return localSites; }
From source file:be_uclouvain_ingi2145_lab05.GiraphJobRunner.java
@Override public int run(String[] strings) throws Exception { GiraphConfiguration gconf = new GiraphConfiguration(conf); //gconf.setVertexClass(SimpleShortestPathsComputation.class); /*gconf.setVertexInputFormatClass( SimpleShortestPathsVertexInputFormat.class); gconf.setVertexOutputFormatClass(/* w ww. j av a2 s.co m*/ SimpleShortestPathsVertexOutputFormat.class); */ CommandLine cmd = ConfigurationUtils.parseArgs(gconf, strings); if (null == cmd) { return 0; } //GiraphYarnClient job = new GiraphYarnClient(gconf,gconf.getClass().getName()); GiraphJob job = new GiraphJob(gconf, getClass().getName()); job.getInternalJob().setJarByClass(getClass()); if (cmd.hasOption("vof") || cmd.hasOption("eof")) { if (cmd.hasOption("op")) { Path outputPath = new Path(cmd.getOptionValue("op")); FileSystem fs = FileSystem.get(outputPath.toUri(), conf); /*Check if output path (args[1])exist or not*/ if (fs.exists(outputPath)) { /*If exist delete the output path*/ fs.delete(outputPath, true); } FileOutputFormat.setOutputPath(job.getInternalJob(), outputPath); } } /* if (cmd.hasOption("vif") || cmd.hasOption("eif")) { if (cmd.hasOption("vip")) { FileInputFormat.addInputPath(job.getInternalJob(), new Path(cmd.getOptionValue("op"))); } }*/ //If there is a custom option specified if (cmd.hasOption("ca")) { String[] args = cmd.getOptionValues("ca"); LOG.fatal("" + Arrays.toString(args)); gconf.set("ca", args[0].split("=")[1]); LOG.fatal("" + gconf.get("ca")); gconf.setWorkerConfiguration(Integer.parseInt(cmd.getOptionValue("w")), Integer.parseInt(cmd.getOptionValue("w")), 100.0f); } /* if (cmd.hasOption("cf")) { DistributedCache.addCacheFile(new URI(cmd.getOptionValue("cf")), job.getConfiguration()); } */ return job.run(true) ? 0 : -1; }