List of usage examples for org.apache.hadoop.fs FileSystem close
@Override public void close() throws IOException
From source file:org.gridgain.grid.ggfs.GridGgfsHadoopFileSystemAbstractSelfTest.java
License:Open Source License
/** @throws Exception If failed. */ public void testCloseIfNotInitialized() throws Exception { final FileSystem fs = new GridGgfsHadoopFileSystem(); // Check close makes nothing harmful. fs.close(); }
From source file:org.gridgain.grid.ggfs.GridGgfsHadoopFileSystemIpcCacheSelfTest.java
License:Open Source License
/** * Test how IPC cache map works./* w w w .j a v a2s . co m*/ * * @throws Exception If failed. */ @SuppressWarnings("unchecked") public void testIpcCache() throws Exception { Field cacheField = GridGgfsHadoopIpcIo.class.getDeclaredField("ipcCache"); cacheField.setAccessible(true); Field activeCntField = GridGgfsHadoopIpcIo.class.getDeclaredField("activeCnt"); activeCntField.setAccessible(true); Map<String, GridGgfsHadoopIpcIo> cache = (Map<String, GridGgfsHadoopIpcIo>) cacheField.get(null); String name = "ggfs:" + getTestGridName(0) + "@"; Configuration cfg = new Configuration(); cfg.addResource(U.resolveGridGainUrl(HADOOP_FS_CFG)); cfg.setBoolean("fs.ggfs.impl.disable.cache", true); cfg.setBoolean(String.format(GridGgfsHadoopUtils.PARAM_GGFS_ENDPOINT_NO_EMBED, name), true); // Ensure that existing IO is reused. FileSystem fs1 = FileSystem.get(new URI("ggfs://" + name + "/"), cfg); assertEquals(1, cache.size()); GridGgfsHadoopIpcIo io = null; System.out.println("CACHE: " + cache); for (String key : cache.keySet()) { if (key.contains("10500")) { io = cache.get(key); break; } } assert io != null; assertEquals(1, ((AtomicInteger) activeCntField.get(io)).get()); // Ensure that when IO is used by multiple file systems and one of them is closed, IO is not stopped. FileSystem fs2 = FileSystem.get(new URI("ggfs://" + name + "/abc"), cfg); assertEquals(1, cache.size()); assertEquals(2, ((AtomicInteger) activeCntField.get(io)).get()); fs2.close(); assertEquals(1, cache.size()); assertEquals(1, ((AtomicInteger) activeCntField.get(io)).get()); Field stopField = GridGgfsHadoopIpcIo.class.getDeclaredField("stopping"); stopField.setAccessible(true); assert !(Boolean) stopField.get(io); // Ensure that IO is stopped when nobody else is need it. fs1.close(); assert cache.isEmpty(); assert (Boolean) stopField.get(io); }
From source file:org.gridgain.loadtests.ggfs.GridGgfsPerformanceBenchmark.java
License:Open Source License
/** * Starts benchmark.//from www . ja v a 2s. c o m * * @param args Program arguments. * [0] - number of threads, default 1. * [1] - file length, default is 1GB. * [2] - stream buffer size, default is 1M. * [3] - fs config path. * @throws Exception If failed. */ public static void main(String[] args) throws Exception { final int threadNum = intArgument(args, 0, 1); final int op = intArgument(args, 1, OP_WRITE); final long fileLen = longArgument(args, 2, 256 * 1024 * 1024); final int bufSize = intArgument(args, 3, 128 * 1024); final String cfgPath = argument(args, 4, HADOOP_FS_CFG); final String fsPrefix = argument(args, 5, FS_PREFIX); final short replication = (short) intArgument(args, 6, 3); final Path ggfsHome = new Path(fsPrefix); final FileSystem fs = ggfs(ggfsHome, cfgPath); final AtomicLong progress = new AtomicLong(); final AtomicInteger idx = new AtomicInteger(); System.out.println("Warming up..."); // warmUp(fs, ggfsHome, op, fileLen); System.out.println("Finished warm up."); if (op == OP_READ) { for (int i = 0; i < threadNum; i++) benchmarkWrite(fs, new Path(ggfsHome, "in-" + i), fileLen, bufSize, replication, null); } long total = 0; long start = System.currentTimeMillis(); GridFuture<Long> fut = GridTestUtils.runMultiThreadedAsync(new Runnable() { @Override public void run() { String fileIdx = op == OP_READ ? String.valueOf(idx.getAndIncrement()) : UUID.randomUUID().toString(); try { for (int i = 0; i < 200; i++) { if (op == OP_WRITE) benchmarkWrite(fs, new Path(ggfsHome, "out-" + fileIdx), fileLen, bufSize, replication, progress); else benchmarkRead(fs, new Path(ggfsHome, "in-" + fileIdx), bufSize, progress); } System.out.println("Finished " + (op == OP_WRITE ? "writing" : "reading") + " data."); } catch (Exception e) { System.out.println("Failed to process stream: " + e); e.printStackTrace(); } } }, threadNum, "test-runner"); while (!fut.isDone()) { U.sleep(1000); long written = progress.getAndSet(0); total += written; int mbytesPerSec = (int) (written / (1024 * 1024)); System.out.println((op == OP_WRITE ? "Write" : "Read") + " rate [threads=" + threadNum + ", bufSize=" + bufSize + ", MBytes/s=" + mbytesPerSec + ']'); } long now = System.currentTimeMillis(); System.out.println((op == OP_WRITE ? "Written" : "Read") + " " + total + " bytes in " + (now - start) + "ms, avg write rate is " + (total * 1000 / ((now - start) * 1024 * 1024)) + "MBytes/s"); fs.close(); }
From source file:org.hortonworks.dovetail.client.Client.java
License:Apache License
/** * Main run function for the client// ww w . j av a 2s. co m * * @return true if application completed successfully * @throws IOException * @throws YarnException */ private boolean run() throws IOException, YarnException { FileSystem fs = FileSystem.get(conf); Deployer deployer = new Deployer(fs, conf); deployer.deployArtifacts(); Path amPath = deployer.getAppMasterPath(); List<Path> configPaths = deployer.getConfigPaths(); LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setApplicationName(appName); ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); FileStatus amStatus = fs.getFileStatus(amPath); LocalResource amJarFileResource = Records.newRecord(LocalResource.class); amJarFileResource.setType(LocalResourceType.FILE); amJarFileResource.setVisibility(LocalResourceVisibility.APPLICATION); amJarFileResource.setResource(ConverterUtils.getYarnUrlFromPath(amPath)); amJarFileResource.setTimestamp(amStatus.getModificationTime()); amJarFileResource.setSize(amStatus.getLen()); localResources.put(conf.get(DovetailConfiguration.DOVETAIL_AM_JAR), amJarFileResource); for (Path path : configPaths) { FileStatus configFileStatus = fs.getFileStatus(path); LocalResource configFileResource = Records.newRecord(LocalResource.class); configFileResource.setType(LocalResourceType.FILE); configFileResource.setVisibility(LocalResourceVisibility.APPLICATION); configFileResource.setResource(ConverterUtils.getYarnUrlFromURI(path.toUri())); configFileResource.setTimestamp(configFileStatus.getModificationTime()); configFileResource.setSize(configFileStatus.getLen()); localResources.put(path.getName(), configFileResource); } fs.close(); amContainer.setLocalResources(localResources); LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar) .append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); env.put("JAVA_HOME", System.getProperty("dovetail.java.home")); LOG.info("JAVA_HOME=" + System.getenv("JAVA_HOME")); amContainer.setEnvironment(env); amContainer.setCommands(AppMasterCommand.getCommands(conf)); Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); appContext.setAMContainerSpec(amContainer); Priority pri = Records.newRecord(Priority.class); pri.setPriority(amPriority); appContext.setPriority(pri); appContext.setQueue(amQueue); LOG.info("Submitting the application to ASM"); yarnClient.submitApplication(appContext); return monitorApplication(appId); }
From source file:org.jacky.hadoop.examples.ExtractData.java
License:Apache License
/** * delete a dir in the hdfs. dir may like /tmp/testdir. * /*from w ww. j ava 2s . c om*/ * @param dir * @throws IOException */ public static void deleteDir(String dir) throws IOException { log.info("delete path"); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(uri, conf); /* * for method delete(Path f, boolean recursive)we need notice:if path * is a directory or is a empty directory and set recursive to true, the * directory is deleted else throws an exception. In case of a file the * recursive can be set to either true or false */ fs.delete(new Path(dir), true); fs.close(); }
From source file:org.mitre.ccv.mapred.GenerateFeatureVectors.java
License:Open Source License
/** * Start a new job with the given configuration and parameters. * * @param jobConf/*from w w w .ja va 2 s . c o m*/ * @param listInput file path containing list of k-mers to use * @param cardinality number of k-mers to use (if list contains less,then that will be used instead). * @param input composition vector {@link SequenceFile} such as generated by {@link CalculateCompositionVectors} * @param output * @param cleanLogs * @return zero if no errors * @throws java.lang.Exception */ public int initJob(JobConf jobConf, String listInput, Integer cardinality, String input, String output, boolean cleanLogs) throws Exception { JobConf conf = new JobConf(jobConf, GenerateFeatureVectors.class); conf.setJobName("GenerateFeatureVectors"); Path listPath = new Path(listInput); // i.e, listInput = win32_200902260829/kmer_120811a7fa1_tmp FileSystem fs = listPath.getFileSystem(conf); if (listInput != null) { // @todo: should check to see if it is there! // It doesn't say it, but we need the quailifed path with the host name // otherwise URI sticks the host on to it not so nicely Path qPath = fs.makeQualified(listPath); // listPath = hdfs://rocks5.local:54310/user/mcolosimo/win32_200902260829/kmer_120811a7fa1_tmp LOG.info(String.format("Caching k-mer file %s", qPath.toString())); // URI:hdfs://rocks5.local:54310/user/mcolosimo/win32_200902260829/kmer_120811a7fa1_tmp URI listURI = new URI(qPath.toString()); DistributedCache.addCacheFile(listURI, conf); conf.set(KMER_LIST, listPath.getName()); //LOG.info("k-mer URI:" + listURI.toString()); } else { throw new Exception("GenerateFeatureVectors requires a list of k-mers!"); } /** We need this. It is okay if the cardinality is larger than the number of k-mers. */ if (cardinality == null) { LOG.info("Scanning k-mer file to determine cardinality"); FSDataInputStream ins = fs.open(listPath); KmerEntropyPairWritable w = new KmerEntropyPairWritable(); int c = 0; while (ins.available() > 0) { w.readFields(ins); c++; } ins.close(); fs.close(); LOG.info(String.format("Found %d k-mers in the file", c)); cardinality = c; } conf.setInt(VECTOR_CARDINALITY, cardinality); // Set up mapper SequenceFileInputFormat.setInputPaths(conf, new Path(input)); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(CompositionVectorMap.class); conf.setOutputKeyClass(Text.class); // final output key class - sample name conf.setOutputValueClass(SparseVectorWritable.class); // final output value class // Set up combiner/reducer conf.setReducerClass(Features2VectorReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); return 0; }
From source file:org.mitre.mapred.fs.FileUtils.java
License:Open Source License
/** * Takes input as a comma separated list of files * and verifies if they exist. It defaults for file:/// * if the files specified do not have a scheme. * it returns the paths uri converted defaulting to file:///. * So an input of /home/user/file1,/home/user/file2 would return * file:///home/user/file1,file:///home/user/file2 * * @see org.apache.hadoop.util.GenericOptionsParser#validateFiles(java.lang.String, org.apache.hadoop.conf.Configuration) * @param files/*from w w w. j a v a 2s .c o m*/ * @return the paths converted to URIs */ public static String validateFiles(String files, Configuration conf) throws IOException { if (files == null) { return null; } String[] fileArr = files.split(","); String[] finalArr = new String[fileArr.length]; for (int i = 0; i < fileArr.length; i++) { String tmp = fileArr[i]; String finalPath; Path path = new Path(tmp); URI pathURI = path.toUri(); FileSystem localFs = FileSystem.getLocal(conf); if (pathURI.getScheme() == null) { //default to the local file system //check if the file exists or not first if (!localFs.exists(path)) { throw new FileNotFoundException("File " + tmp + " does not exist."); } finalPath = path.makeQualified(localFs).toString(); } else { // check if the file exists in this file system // we need to recreate this filesystem object to copy // these files to the file system jobtracker is running // on. FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { throw new FileNotFoundException("File " + tmp + " does not exist."); } finalPath = path.makeQualified(fs).toString(); try { fs.close(); } catch (IOException e) { } } finalArr[i] = finalPath; } return StringUtils.arrayToString(finalArr); }
From source file:org.mrgeo.format.CsvInputFormatTest.java
License:Apache License
@Test @Category(UnitTest.class) public void testBasics() throws Exception { // this class and its unit tests are a work in progress. FileSystem fs = new RawLocalFileSystem(); try {//from w w w . j a va2s.c om Job j = new Job(new Configuration()); Configuration c = j.getConfiguration(); fs.setConf(c); Path testFile = new Path(input, "testBasics.csv"); testFile = fs.makeQualified(testFile); FileInputFormat.addInputPath(j, testFile); FileSplit split = new FileSplit(testFile, 0, 500, null); CsvInputFormat.CsvRecordReader reader = new CsvInputFormat.CsvRecordReader(); reader.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID())); @SuppressWarnings("unused") int l = 0; StringBuffer buf = new StringBuffer(); String[] base = { "word1:Hello word2:world number:1 ", "word1:foo word2:bar number:2 ", "word1:cat word2:dog number:3 ", "word1:rock word2:paper number:4 ", "word1:red word2:blue, number:5 ", "word1:,green, word2:,, number:6 ", }; int index = 0; while (reader.nextKeyValue()) { Geometry f = reader.getCurrentValue(); String row = ""; for (Map.Entry attr : f.getAllAttributes().entrySet()) { row += attr.getKey() + ":" + attr.getValue() + " "; } Assert.assertEquals("Error in row " + index, base[index++], row); } // This hash code will tell us if anything changes then it can be manually verified. } catch (Exception e) { e.printStackTrace(); throw e; } finally { fs.close(); } }
From source file:org.mrgeo.format.CsvInputFormatTest.java
License:Apache License
@Test @Category(UnitTest.class) public void testNullProcessing() throws Exception { // this class and its unit tests are a work in progress. FileSystem fs = new RawLocalFileSystem(); try {/*from w w w . j av a 2 s . com*/ Job j = new Job(new Configuration()); Configuration c = j.getConfiguration(); fs.setConf(c); Path testFile = new Path(input, "testNullValues.csv"); testFile = fs.makeQualified(testFile); FileInputFormat.addInputPath(j, testFile); FileSplit split = new FileSplit(testFile, 0, 500, null); CsvInputFormat.CsvRecordReader reader = new CsvInputFormat.CsvRecordReader(); reader.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID())); @SuppressWarnings("unused") int l = 0; //StringBuffer buf = new StringBuffer(); // Test specific rows returned to make sure the values are as expected. Assert.assertTrue(reader.nextKeyValue()); Geometry f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertEquals("test1", f.getAttribute("string1")); Assert.assertEquals(1.0, Double.parseDouble(f.getAttribute("int1")), EPSILON); Assert.assertEquals(1.5, Double.parseDouble(f.getAttribute("double1")), EPSILON); // Row 2 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertEquals("test2", f.getAttribute("string1")); Assert.assertEquals(2.0, Double.parseDouble(f.getAttribute("int1")), EPSILON); Assert.assertNull("Expected null value instead of: " + f.getAttribute("double1"), f.getAttribute("2")); // Row 3 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertEquals("test3", f.getAttribute("string1")); Assert.assertEquals(3.0, Double.parseDouble(f.getAttribute("int1")), EPSILON); Assert.assertEquals(3.5, Double.parseDouble(f.getAttribute("double1")), EPSILON); // Row 4 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertEquals("test4", f.getAttribute("string1")); Assert.assertNull("Expected null value instead of: " + f.getAttribute("int1"), f.getAttribute("1")); Assert.assertEquals(4.5, Double.parseDouble(f.getAttribute("double1")), EPSILON); // Row 5 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertEquals("test5", f.getAttribute("string1")); Assert.assertEquals(5.0, Double.parseDouble(f.getAttribute("int1")), EPSILON); Assert.assertEquals(5.5, Double.parseDouble(f.getAttribute("double1")), EPSILON); // Row 6 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertEquals("test6", f.getAttribute("string1")); Assert.assertEquals("", f.getAttribute("int1")); Assert.assertEquals("", f.getAttribute("double1")); // Row 7 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertEquals("test7", f.getAttribute("string1")); Assert.assertNull("Expected null value instead of: " + f.getAttribute("int1"), f.getAttribute("int1")); Assert.assertNull("Expected null value instead of: " + f.getAttribute("double1"), f.getAttribute("double1")); Assert.assertFalse(reader.nextKeyValue()); } catch (Exception e) { e.printStackTrace(); throw e; } finally { fs.close(); } }
From source file:org.mrgeo.format.CsvInputFormatTest.java
License:Apache License
@Test @Category(UnitTest.class) public void testNullIgnore() throws Exception { FileSystem fs = new RawLocalFileSystem(); try {/*from ww w . j a v a 2 s.c o m*/ int lineCount = 0; // Write columns file which defines the columns title and type String cstr = "<?xml version='1.0' encoding='UTF-8'?>\n<AllColumns firstLineHeader='false'>\n"; cstr += " <Column name='name' type='Nominal'/>\n"; cstr += " <Column name='x' type='Numeric'/>\n"; cstr += " <Column name='y' type='Numeric'/>\n"; cstr += "</AllColumns>\n"; FileOutputStream fos = new FileOutputStream(output + "/nulXY.csv.columns"); PrintStream ps = new PrintStream(fos); ps.print(cstr); ps.close(); // Write csv test data fos = new FileOutputStream(output + "/nullXY.csv"); ps = new PrintStream(fos); // populated rows for (int ii = 0; ii < 10; ii++) { ps.print("ASDF,1.0,1.0\n"); lineCount++; } // empty rows ps.print("ASDF,,1.0\n"); ps.print("ASDF,1.0,\n"); ps.print("ASDF,,\n"); lineCount += 3; // populated rows for (int ii = 0; ii < 5; ii++) { ps.print("ASDF,1.0,1.0\n"); lineCount++; } ps.close(); System.out.println(output + "nulXY.csv"); Job j = new Job(new Configuration()); Configuration c = j.getConfiguration(); fs.setConf(c); Path testFile = new Path(output, "nullXY.csv"); testFile = fs.makeQualified(testFile); InputSplit split; long l; long start; TextInputFormat format = new TextInputFormat(); split = new FileSplit(testFile, 0, lineCount * 1000, null); RecordReader<LongWritable, Text> reader2 = format.createRecordReader(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID())); reader2.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID())); l = 0; start = System.currentTimeMillis(); while (reader2.nextKeyValue()) { reader2.getCurrentValue().toString(); l++; } Assert.assertEquals(lineCount, l); System.out.printf("text line reader with null x,y ignore: %d\n", System.currentTimeMillis() - start); } catch (Exception e) { e.printStackTrace(); throw e; } finally { fs.close(); } }