List of usage examples for org.apache.hadoop.fs FileSystem close
@Override public void close() throws IOException
From source file:org.bgi.flexlab.gaea.data.structure.header.GaeaVCFHeader.java
License:Open Source License
public boolean writeToHDFS(Path path) { ObjectOutputStream ostream = null; try {/*w w w. j a v a 2 s.c o m*/ FileSystem fs = path.getFileSystem(new Configuration()); ostream = new ObjectOutputStream(fs.create(path)); ostream.writeObject(this); ostream.close(); fs.close(); } catch (IOException e) { ostream = null; e.printStackTrace(); return false; } return true; }
From source file:org.bgi.flexlab.gaea.data.structure.header.MultipleVCFHeader.java
License:Open Source License
public void mergeHeader(Path inputPath, String output, Job job, boolean distributeCacheHeader) { Configuration conf = job.getConfiguration(); try {//from ww w . j ava 2 s.c o m FileSystem fs = inputPath.getFileSystem(conf); fs = inputPath.getFileSystem(conf); if (!fs.exists(inputPath)) { System.out.println("Input File Path is not exist! Please check input var."); System.exit(-1); } if (fs.isFile(inputPath)) { if (validPath(inputPath, fs)) { readVcfHeader(inputPath, conf); } } else { FileStatus stats[] = fs.listStatus(inputPath); for (FileStatus file : stats) { Path filePath = file.getPath(); mergeHeader(filePath, output, job, distributeCacheHeader); } } fs.close(); } catch (Exception e) { throw new RuntimeException(e); } if (distributeCacheHeader) { distributeCacheVcfHeader(output, job, conf); } else { writeHeaderToHDFS(output, conf); } }
From source file:org.bgi.flexlab.gaea.data.structure.reads.report.FastqQualityControlReporterIO.java
License:Open Source License
public void mergeReport(Path input, Configuration conf, Path outputDir) throws IOException { FileSystem fs = input.getFileSystem(conf); FileStatus filelist[] = fs.listStatus(input, new StaticPathFilter()); int ssize = sample == null ? 1 : sample.getSampleNumber(); FastqQualityControlReport report = new FastqQualityControlReport(ssize, isMulti); for (int i = 0; i < filelist.length; i++) { if (!filelist[i].isDirectory()) { readFromHdfs(filelist[i].getPath(), conf, report); fs.delete(filelist[i].getPath(), false); }//from w w w . java 2s . c o m } fs.close(); for (int i = 0; i < ssize; i++) { String reportFileName; String graphFileName; if (sample != null && isMulti) { String fileName = sample.getFileNameForId(i); reportFileName = outputDir + "/" + fileName + ".filter.report.txt"; graphFileName = outputDir + "/" + fileName + ".graph.data.txt"; } else { reportFileName = outputDir + "/filter.report.txt"; graphFileName = outputDir + "/graph.data.txt"; } write(reportFileName, conf, report.getReportContext(i)); write(graphFileName, conf, report.getGraphContext(i)); } }
From source file:org.bgi.flexlab.gaea.tools.bamqualtiycontrol.report.BamReport.java
License:Open Source License
public static void getOutput(BamQualityControlOptions options, Configuration conf, Path oPath) throws IOException { ReportBuilder reportBuilder = new ReportBuilder(); ResultReport reportType;//from ww w . j av a 2s . c o m ReferenceShare genome = new ReferenceShare(); genome.loadChromosomeList(options.getReferenceSequencePath()); if ((options.getRegion() != null) || (options.getBedfile() != null)) reportType = new RegionResultReport(options, conf); else reportType = new WholeGenomeResultReport(options); Map<String, ResultReport> reports = new ConcurrentHashMap<String, ResultReport>(); FileSystem fs = oPath.getFileSystem(conf); FileStatus filelist[] = fs.listStatus(oPath); for (int i = 0; i < filelist.length; i++) { if (!filelist[i].isDir() && !filelist[i].getPath().toString().startsWith("_")) { FSDataInputStream reader = fs.open(filelist[i].getPath()); LineReader lineReader = new LineReader(reader, conf); Text line = new Text(); while (lineReader.readLine(line) > 0) { String lineString = line.toString(); if (line.getLength() == 0) { continue; } if (lineString.contains("sample:")) { String sample = line.toString().split(":")[1]; if (!reports.containsKey(sample)) { reports.put(sample, reportType); reportBuilder.setReportChoice(reportType); reportBuilder.initReports(sample); } else { reportType = reports.get(sample); reportBuilder.setReportChoice(reportType); } } reportBuilder.parseReport(lineReader, line, genome); } lineReader.close(); reader.close(); } } for (String sampleName : reports.keySet()) { System.err.println("sample:" + sampleName); ResultReport report = reports.get(sampleName); report.write(fs, sampleName); } fs.close(); }
From source file:org.commoncrawl.service.pagerank.slave.PageRankUtils.java
License:Open Source License
public static void distributeRank(final PRValueMap valueMap, final Path outlinksFile, final boolean outlinksIsRemote, File localOutputDir, String remoteOutputDir, int thisNodeIdx, int nodeCount, int iterationNumber, final ProgressAndCancelCheckCallback progressCallback) throws IOException { final Configuration conf = CrawlEnvironment.getHadoopConfig(); Vector<PRValueOutputStream> outputStreamVector = new Vector<PRValueOutputStream>(); // allocate a queue ... final LinkedBlockingQueue<OutlinkItem> queue = new LinkedBlockingQueue<OutlinkItem>(20000); try {/*from w w w . j a v a 2 s. com*/ // start the loader thread ... Thread loaderThread = new Thread(new Runnable() { final BytesWritable key = new BytesWritable(); final BytesWritable value = new BytesWritable(); final DataInputBuffer keyStream = new DataInputBuffer(); final DataInputBuffer valueStream = new DataInputBuffer(); @Override public void run() { LOG.info("Opening Outlinks File at:" + outlinksFile); SequenceFile.Reader reader = null; try { FileSystem fsForOutlinksFile = null; if (outlinksIsRemote) { fsForOutlinksFile = CrawlEnvironment.getDefaultFileSystem(); } else { fsForOutlinksFile = FileSystem.getLocal(conf); } long bytesToReadTotal = fsForOutlinksFile.getLength(outlinksFile); reader = new SequenceFile.Reader(fsForOutlinksFile, outlinksFile, conf); OutlinkItem item = new OutlinkItem(); int itemCount = 0; boolean isCancelled = false; while (!isCancelled && reader.next(key, value)) { keyStream.reset(key.get(), 0, key.getLength()); valueStream.reset(value.get(), 0, value.getLength()); //populate item from data readURLFPFromStream(keyStream, item.targetFingerprint); item.urlCount = readURLFPAndCountFromStream(valueStream, item.sourceFingerprint); try { long blockTimeStart = System.currentTimeMillis(); queue.put(item); long blockTimeEnd = System.currentTimeMillis(); } catch (InterruptedException e) { } item = new OutlinkItem(); if (itemCount++ % 10000 == 0 && progressCallback != null) { float percentComplete = (float) reader.getPosition() / (float) bytesToReadTotal; if (progressCallback.updateProgress(percentComplete)) { LOG.info("Cancel check callback returned true.Cancelling outlink item load"); isCancelled = true; } } } item.sourceFingerprint = null; item.targetFingerprint = null; // add empty item try { if (!isCancelled) { queue.put(item); } else { queue.put(new OutlinkItem(new IOException("Operation Cancelled"))); } } catch (InterruptedException e) { } } catch (IOException e) { // add error item to queue. try { queue.put(new OutlinkItem(e)); } catch (InterruptedException e1) { } } finally { if (reader != null) try { reader.close(); } catch (IOException e) { } } } }); loaderThread.start(); // first things first ... initialize output stream vector FileSystem fileSystem = buildDistributionOutputStreamVector(true, getOutlinksBaseName(thisNodeIdx, iterationNumber), localOutputDir, remoteOutputDir, thisNodeIdx, nodeCount, outputStreamVector); try { // open outlinks file . LOG.info("Iterating Items in Outlinks File and Writing Test Value"); int itemCount = 0; int totalOutlinkCount = 0; int iterationOutlinkCount = 0; long iterationStart = System.currentTimeMillis(); long timeStart = iterationStart; boolean done = false; ArrayList<OutlinkItem> items = new ArrayList<OutlinkItem>(); // start iterating outlinks while (!done) { //OutlinkItem item = null; //try { long waitTimeStart = System.currentTimeMillis(); queue.drainTo(items); long waitTimeEnd = System.currentTimeMillis(); //} catch (InterruptedException e) { //} for (OutlinkItem item : items) { if (item.error != null) { LOG.info( "Loader Thread Returned Error:" + CCStringUtils.stringifyException(item.error)); throw item.error; } else if (item.sourceFingerprint == null) { LOG.info("Loader Thread Indicated EOF via emtpy item"); done = true; } else { ++itemCount; /* LOG.info("SourceFP-DomainHash:" + item.sourceFingerprint.getDomainHash() + " URLHash:" + item.sourceFingerprint.getUrlHash() + " PartitionIdx:" + ((item.sourceFingerprint.hashCode() & Integer.MAX_VALUE) % CrawlEnvironment.PR_NUMSLAVES) ); */ // now get pr value for fingerprint (random seek in memory here!!!) float prValue = valueMap.getPRValue(item.sourceFingerprint) / (float) Math.max(item.urlCount, 1); // write value out int nodeIndex = (item.targetFingerprint.hashCode() & Integer.MAX_VALUE) % nodeCount; outputStreamVector.get(nodeIndex).writePRValue(item.targetFingerprint, item.sourceFingerprint, prValue); if (itemCount % 10000 == 0) { long timeEnd = System.currentTimeMillis(); int milliseconds = (int) (timeEnd - iterationStart); LOG.info("Distribute PR for 10000 Items with:" + iterationOutlinkCount + " Outlinks Took:" + milliseconds + " Milliseconds" + " QueueCount:" + queue.size()); iterationStart = System.currentTimeMillis(); totalOutlinkCount += iterationOutlinkCount; iterationOutlinkCount = 0; } } } items.clear(); } totalOutlinkCount += iterationOutlinkCount; LOG.info("Distribute Finished for a total of:" + itemCount + " Items with:" + totalOutlinkCount + " Outlinks Took:" + (System.currentTimeMillis() - timeStart) + " Milliseconds"); LOG.info("Waiting for Loader Thread to Die"); try { loaderThread.join(); } catch (InterruptedException e) { } LOG.info("Loader Thread Died - Moving on..."); } finally { for (PRValueOutputStream info : outputStreamVector) { if (info != null) { info.close(false); } } if (fileSystem != null) { fileSystem.close(); } } } catch (IOException e) { LOG.error("Exception caught while distributing outlinks:" + CCStringUtils.stringifyException(e)); throw e; } }
From source file:org.deeplearning4j.AnimalModelByHdfsSparkCluster.java
License:Apache License
public static void saveModel(FileSystem fs, Model model) throws Exception { String json = null;//from w w w . j a v a 2 s. c om if (model instanceof MultiLayerNetwork) { json = ((MultiLayerNetwork) model).getLayerWiseConfigurations().toJson(); } else if (model instanceof ComputationGraph) { json = ((ComputationGraph) model).getConfiguration().toJson(); } byte[] byts = json.getBytes(); FSDataOutputStream out = fs.create(new Path(modelPath)); out.write(byts); out.hsync(); fs.close(); }
From source file:org.deeplearning4j.hadoop.datasetiterator.BaseHdfsDataSetIterator.java
License:Apache License
/** * List all of the files in the //from ww w . j a v a2s . com * hdfsUriRootDir directory * @return the list of paths in the directory * @throws Exception if one occurs */ public List<Path> filesInDir() throws Exception { FileSystem fs = FileSystem.get(conf); List<Path> paths = new ArrayList<Path>(); RemoteIterator<LocatedFileStatus> iter = fs.listFiles(new Path(hdfsUriRootDir), true); while (iter.hasNext()) { LocatedFileStatus l = iter.next(); paths.add(l.getPath()); } fs.close(); return paths; }
From source file:org.deeplearning4j.hadoop.util.HdfsUtils.java
License:Apache License
public static void close(Configuration conf) throws Exception { FileSystem system = systems.get(conf); if (system != null) { system.close(); systems.remove(conf);// w ww.j a va 2 s. co m } }
From source file:org.deeplearning4j.utils.CommonUtils.java
License:Apache License
public static void closeHdfsConnect(FileSystem fs) { try {//from w w w . j a va2s .c o m if (fs != null) { fs.close(); } } catch (IOException e) { e.printStackTrace(); } }
From source file:org.gridgain.grid.ggfs.GridGgfsHadoopFileSystemAbstractSelfTest.java
License:Open Source License
/** * Test how IPC cache map works./*from ww w .jav a 2 s.c o m*/ * * @throws Exception If failed. */ public void testIpcCache() throws Exception { GridGgfsHadoopEx hadoop = GridTestUtils.getFieldValue(fs, "rmtClient", "delegateRef", "value", "hadoop"); if (hadoop instanceof GridGgfsHadoopOutProc) { FileSystem fsOther = null; try { Field field = GridGgfsHadoopIpcIo.class.getDeclaredField("ipcCache"); field.setAccessible(true); Map<String, GridGgfsHadoopIpcIo> cache = (Map<String, GridGgfsHadoopIpcIo>) field.get(null); Configuration cfg = configuration(PRIMARY_AUTHORITY, skipEmbed, skipLocShmem); // we disable caching in order to obtain new FileSystem instance. cfg.setBoolean("fs.ggfs.impl.disable.cache", true); // Initial cache size. int initSize = cache.size(); // Ensure that when IO is used by multiple file systems and one of them is closed, IO is not stopped. fsOther = FileSystem.get(new URI(PRIMARY_URI), cfg); assert fs != fsOther; assertEquals(initSize, cache.size()); fsOther.close(); assertEquals(initSize, cache.size()); Field stopField = GridGgfsHadoopIpcIo.class.getDeclaredField("stopping"); stopField.setAccessible(true); GridGgfsHadoopIpcIo io = null; for (Map.Entry<String, GridGgfsHadoopIpcIo> ioEntry : cache.entrySet()) { if (endpoint.contains(ioEntry.getKey())) { io = ioEntry.getValue(); break; } } assert io != null; assert !(Boolean) stopField.get(io); // Ensure that IO is stopped when nobody else is need it. fs.close(); assertEquals(initSize - 1, cache.size()); assert (Boolean) stopField.get(io); } finally { U.closeQuiet(fsOther); } } }