List of usage examples for org.apache.hadoop.fs FileSystem close
@Override public void close() throws IOException
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
void keyGet() throws Exception { System.out.println("key get, path " + path + ", size " + size + ", loop " + loop); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path[] paths = new Path[loop]; for (int i = 0; i < loop; i++) { String child = "" + i; paths[i] = new Path(path, child); System.out.println("path " + paths[i]); }/*from ww w . j a v a 2 s. co m*/ byte[] outBuf = new byte[size]; for (Path p : paths) { FSDataOutputStream outputStream = fs.create(p); outputStream.write(outBuf); outputStream.close(); } long start = System.currentTimeMillis(); ByteBuffer inBuf = ByteBuffer.allocateDirect(size); for (int i = 0; i < loop; i++) { Path p = paths[i]; FSDataInputStream inputStream = fs.open(p); inBuf.clear(); while (inBuf.remaining() > 0) { inputStream.read(inBuf); } inputStream.close(); } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)); double latency = executionTime * 1000.0 / ((double) loop); System.out.println("execution time [ms] " + executionTime); System.out.println("latency [us] " + latency); fs.close(); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
void browseDir() throws Exception { System.out.println("reading enumarate dir, path " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); //benchmark//from w w w.ja v a 2s . c om System.out.println("starting benchmark..."); RemoteIterator<LocatedFileStatus> iter = fs.listFiles(path, false); while (iter.hasNext()) { LocatedFileStatus status = iter.next(); System.out.println(status.getPath()); } fs.close(); }
From source file:com.iflytek.spider.parse.ParseText.java
License:Apache License
public static void main(String argv[]) throws Exception { String usage = "ParseText (-local | -dfs <namenode:port>) recno segment"; if (argv.length < 3) { System.out.println("usage:" + usage); return;// www . j a v a 2 s .c o m } Options opts = new Options(); Configuration conf = SpiderConfiguration.create(); GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv); String[] remainingArgs = parser.getRemainingArgs(); FileSystem fs = FileSystem.get(conf); try { int recno = Integer.parseInt(remainingArgs[0]); String segment = remainingArgs[1]; String filename = new Path(segment, ParseText.DIR_NAME).toString(); ParseText parseText = new ParseText(); ArrayFile.Reader parseTexts = new ArrayFile.Reader(fs, filename, conf); parseTexts.get(recno, parseText); System.out.println("Retrieved " + recno + " from file " + filename); System.out.println(parseText); parseTexts.close(); } finally { fs.close(); } }
From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java
License:Apache License
private void testCreateListing() { try {/* ww w . ja v a2s . c o m*/ Cluster cluster = ClusterTest.buildLocalCluster(); FileSystem fs = mock(FileSystem.class); createMockForFileSystem(fs, cluster); Map<FileStatus, String> results = new TreeMap<FileStatus, java.lang.String>(); Set<FileStatus> trashSet = new HashSet<FileStatus>(); Table<String, String, String> checkpointPaths = HashBasedTable.create(); fs.delete(cluster.getDataDir(), true); FileStatus dataDir = new FileStatus(20, false, 3, 23823, 2438232, cluster.getDataDir()); fs.delete(new Path(cluster.getRootDir() + "/conduit-checkpoint"), true); Set<String> streamsToProcess = new HashSet<String>(); streamsToProcess.add("stream1"); streamsToProcess.add("stream2"); TestLocalStreamService service = new TestLocalStreamService(null, cluster, null, new FSCheckpointProvider(cluster.getRootDir() + "/conduit-checkpoint"), streamsToProcess); service.createListing(fs, dataDir, results, trashSet, checkpointPaths); Set<String> tmpResults = new LinkedHashSet<String>(); // print the results for (FileStatus status : results.keySet()) { tmpResults.add(status.getPath().toString()); LOG.debug("Results [" + status.getPath().toString() + "]"); } // print the trash Iterator<FileStatus> it = trashSet.iterator(); Set<String> tmpTrashPaths = new LinkedHashSet<String>(); while (it.hasNext()) { FileStatus trashfile = it.next(); tmpTrashPaths.add(trashfile.getPath().toString()); LOG.debug("trash file [" + trashfile.getPath()); } Map<String, String> tmpCheckPointPaths = new TreeMap<String, String>(); Set<String> streams = checkpointPaths.rowKeySet(); for (String streamName : streams) { Map<String, String> collectorCheckpointValueMap = checkpointPaths.row(streamName); for (String collector : collectorCheckpointValueMap.keySet()) { String checkpointKey = AbstractService.getCheckPointKey(service.getName(), streamName, collector); LOG.debug("Check Pointing Key [" + checkpointKey + "] with value [" + collectorCheckpointValueMap.get(collector) + "]"); tmpCheckPointPaths.put(checkpointKey, collectorCheckpointValueMap.get(collector)); } } validateExpectedOutput(tmpResults, tmpTrashPaths, tmpCheckPointPaths); fs.delete(new Path(cluster.getRootDir() + "/conduit-checkpoint"), true); fs.delete(cluster.getDataDir(), true); fs.close(); } catch (Exception e) { LOG.debug("Error in running testCreateListing", e); assert false; } }
From source file:com.inmobi.conduit.purge.DataPurgerServiceTest.java
License:Apache License
private void testPurgerService(String testfilename, int numofhourstoadd, boolean checkifexists, boolean checktrashexists) throws Exception { ConduitConfigParser configparser = new ConduitConfigParser(testfilename); ConduitConfig config = configparser.getConfig(); for (Cluster cluster : config.getClusters().values()) { TestDataPurgerService service = new TestDataPurgerService(config, cluster); FileSystem fs = FileSystem.getLocal(new Configuration()); fs.delete(new Path(cluster.getRootDir()), true); Calendar todaysdate = new GregorianCalendar(Calendar.getInstance().getTimeZone()); todaysdate.add(Calendar.HOUR, numofhourstoadd); createTestPurgefiles(fs, cluster, todaysdate, false); service.runOnce();/*from www . j a v a2 s. c om*/ verifyPurgefiles(fs, cluster, todaysdate, checkifexists, checktrashexists); fs.delete(new Path(cluster.getRootDir()), true); fs.close(); } }
From source file:com.inmobi.conduit.purge.DataPurgerServiceTest.java
License:Apache License
public void testDataPurger() throws Exception { AbstractService.clearHCatInMemoryMaps(); LOG.info("Check data purger does not stop when unable to delete a path"); ConduitConfigParser configparser = new ConduitConfigParser("test-dps-conduit_X_5.xml"); ConduitConfig config = configparser.getConfig(); for (Cluster cluster : config.getClusters().values()) { FileSystem fs = FileSystem.getLocal(new Configuration()); fs.delete(new Path(cluster.getRootDir()), true); Calendar date1 = new GregorianCalendar(Calendar.getInstance().getTimeZone()); date1.add(Calendar.HOUR, -7); createTestPurgefiles(fs, cluster, date1, false); Calendar date2 = new GregorianCalendar(Calendar.getInstance().getTimeZone()); date2.add(Calendar.HOUR, -6); createTestPurgefiles(fs, cluster, date2, false); ArrayList<Path> pathsToProcess = new ArrayList<Path>(); Path[] paths = getLocalCommitPath(fs, cluster, date2); for (Path path : paths) { fs.setPermission(path, new FsPermission("000")); pathsToProcess.add(path);/*from w w w. j a va 2s.c o m*/ } paths = getMergeCommitPath(fs, cluster, date2); for (Path path : paths) { fs.setPermission(path, new FsPermission("000")); pathsToProcess.add(path); } Calendar date3 = new GregorianCalendar(Calendar.getInstance().getTimeZone()); date3.add(Calendar.HOUR, -5); createTestPurgefiles(fs, cluster, date3, false); TestDataPurgerService service = new TestDataPurgerService(config, cluster); service.runOnce(); verifyPurgefiles(fs, cluster, date1, false, false); verifyPurgefiles(fs, cluster, date2, true, false); verifyPurgefiles(fs, cluster, date3, false, false); for (Path p : pathsToProcess) { fs.setPermission(p, new FsPermission("755")); } fs.delete(new Path(cluster.getRootDir()), true); fs.close(); } Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("DataPurgerService", "purgePaths.count", DataPurgerService.class.getName()).getValue().longValue(), 9); Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("DataPurgerService", "deleteFailures.count", DataPurgerService.class.getName()).getValue().longValue(), 0); }
From source file:com.inmobi.conduit.purge.DataPurgerServiceTest.java
License:Apache License
public void testTrashPurging() throws Exception { LOG.info("Creating empty data dirs"); ConduitConfigParser configparser = new ConduitConfigParser("test-dps-conduit_X_6.xml"); ConduitConfig config = configparser.getConfig(); for (Cluster cluster : config.getClusters().values()) { FileSystem fs = FileSystem.getLocal(new Configuration()); fs.delete(new Path(cluster.getRootDir()), true); Calendar date1 = new GregorianCalendar(Calendar.getInstance().getTimeZone()); date1.add(Calendar.HOUR, -48); createTestPurgefiles(fs, cluster, date1, true); Calendar date2 = new GregorianCalendar(Calendar.getInstance().getTimeZone()); date2.add(Calendar.HOUR, -24); createTestPurgefiles(fs, cluster, date2, true); Calendar date3 = new GregorianCalendar(Calendar.getInstance().getTimeZone()); date3.add(Calendar.HOUR, -1); createTestPurgefiles(fs, cluster, date3, false); TestDataPurgerService service = new TestDataPurgerService(config, cluster); service.runOnce();// ww w . ja v a2 s. c o m verifyPurgefiles(fs, cluster, date1, false, false); verifyPurgefiles(fs, cluster, date2, false, false); verifyPurgefiles(fs, cluster, date3, true, true); fs.delete(new Path(cluster.getRootDir()), true); fs.close(); } Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("DataPurgerService", "purgePaths.count", DataPurgerService.class.getName()).getValue().longValue(), 6); }
From source file:com.inmobi.databus.local.LocalStreamServiceTest.java
License:Apache License
private void testCreateListing() { try {/*from w ww . j ava 2s .c o m*/ Cluster cluster = ClusterTest.buildLocalCluster(); FileSystem fs = mock(FileSystem.class); createMockForFileSystem(fs, cluster); Map<FileStatus, String> results = new TreeMap<FileStatus, java.lang.String>(); Set<FileStatus> trashSet = new HashSet<FileStatus>(); Map<String, FileStatus> checkpointPaths = new HashMap<String, FileStatus>(); fs.delete(cluster.getDataDir(), true); FileStatus dataDir = new FileStatus(20, false, 3, 23823, 2438232, cluster.getDataDir()); fs.delete(new Path(cluster.getRootDir() + "/databus-checkpoint"), true); TestLocalStreamService service = new TestLocalStreamService(null, cluster, new FSCheckpointProvider(cluster.getRootDir() + "/databus-checkpoint")); service.createListing(fs, dataDir, results, trashSet, checkpointPaths); Set<String> tmpResults = new LinkedHashSet<String>(); // print the results for (FileStatus status : results.keySet()) { tmpResults.add(status.getPath().toString()); LOG.debug("Results [" + status.getPath().toString() + "]"); } // print the trash Iterator<FileStatus> it = trashSet.iterator(); Set<String> tmpTrashPaths = new LinkedHashSet<String>(); while (it.hasNext()) { FileStatus trashfile = it.next(); tmpTrashPaths.add(trashfile.getPath().toString()); LOG.debug("trash file [" + trashfile.getPath()); } Map<String, String> tmpCheckPointPaths = new TreeMap<String, String>(); // print checkPointPaths for (String key : checkpointPaths.keySet()) { tmpCheckPointPaths.put(key, checkpointPaths.get(key).getPath().getName()); LOG.debug("CheckPoint key [" + key + "] value [" + checkpointPaths.get(key).getPath().getName() + "]"); } validateExpectedOutput(tmpResults, tmpTrashPaths, tmpCheckPointPaths); fs.delete(new Path(cluster.getRootDir() + "/databus-checkpoint"), true); fs.delete(cluster.getDataDir(), true); fs.close(); } catch (Exception e) { LOG.debug("Error in running testCreateListing", e); assert false; } }
From source file:com.inmobi.databus.purge.DataPurgerServiceTest.java
License:Apache License
private void testPurgerService(String testfilename, int numofhourstoadd, boolean checkifexists, boolean checktrashexists) throws Exception { DatabusConfigParser configparser = new DatabusConfigParser(testfilename); DatabusConfig config = configparser.getConfig(); for (Cluster cluster : config.getClusters().values()) { TestDataPurgerService service = new TestDataPurgerService(config, cluster); FileSystem fs = FileSystem.getLocal(new Configuration()); fs.delete(new Path(cluster.getRootDir()), true); Calendar todaysdate = new GregorianCalendar(Calendar.getInstance().getTimeZone()); todaysdate.add(Calendar.HOUR, numofhourstoadd); createTestPurgefiles(fs, cluster, todaysdate); service.runOnce();// w w w . ja v a2 s .c o m verifyPurgefiles(fs, cluster, todaysdate, checkifexists, checktrashexists); fs.delete(new Path(cluster.getRootDir()), true); fs.close(); } }
From source file:com.kse.bigdata.main.Driver.java
License:Apache License
public static void main(String[] args) throws Exception { /********************************************************************************** ** Merge the source files into one. ** /** Should change the directories of each file before executing the program ** ***********************************************************************************/ // String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug"; // String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv"; // File resultFile = new File(resultFileDirectory); // if(!resultFile.exists()) // new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles(); /********************************************************************************** * Hadoop Operation.//from ww w.j a va 2 s. c o m * Befort Start, Check the Length of Sequence We Want to Predict. **********************************************************************************/ Configuration conf = new Configuration(); //Enable MapReduce intermediate compression as Snappy conf.setBoolean("mapred.compress.map.output", true); conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //Enable Profiling //conf.setBoolean("mapred.task.profile", true); String testPath = null; String inputPath = null; String outputPath = null; int sampleSize = 1; ArrayList<String> results = new ArrayList<String>(); for (int index = 0; index < args.length; index++) { /* * Mandatory command */ //Extract input path string from command line. if (args[index].equals("-in")) inputPath = args[index + 1]; //Extract output path string from command line. if (args[index].equals("-out")) outputPath = args[index + 1]; //Extract test data path string from command line. if (args[index].equals("-test")) testPath = args[index + 1]; /* * Optional command */ //Extract a number of neighbors. if (args[index].equals("-nn")) conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1])); //Whether job uses normalization or not. if (args[index].equals("-norm")) conf.setBoolean(Map.NORMALIZATION, true); //Extract the number of sample size to test. if (args[index].equals("-s")) sampleSize = Integer.valueOf(args[index + 1]); //Whether job uses mean or median //[Default : mean] if (args[index].equals("-med")) conf.setBoolean(Reduce.MEDIAN, true); } String outputFileName = "part-r-00000"; SequenceSampler sampler = new SequenceSampler(testPath, sampleSize); LinkedList<Sequence> testSequences = sampler.getRandomSample(); // Test Sequence // String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100"; // Sequence testSeq = new Sequence(testSeqString); // LinkedList<Sequence> testSequences = new LinkedList<>(); // testSequences.add(testSeq); for (Sequence seq : testSequences) { /* ******************** Hadoop Launch *********************** */ System.out.println(seq.getTailString()); conf.set(Map.INPUT_SEQUENCE, seq.toString()); Job job = new Job(conf); job.setJarByClass(Driver.class); job.setJobName("term-project-driver"); job.setMapperClass(Map.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); // Should think another way to implement the combiner class // Current Implementation is not helpful to Job. // job.setCombinerClass(Combiner.class); //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set. job.setNumReduceTasks(1); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); /* * if job finishes, get result of the job and store it in results(list). */ try { FileSystem hdfs = FileSystem.get(new Configuration()); BufferedReader fileReader = new BufferedReader( new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName)))); String line; while ((line = fileReader.readLine()) != null) { results.add(seq.getSeqString() + " " + line); } fileReader.close(); hdfs.delete(new Path(outputPath), true); hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } /* * if all jobs finish, store results of jobs to output/result.txt file. */ String finalOutputPath = "output/result.csv"; try { FileSystem hdfs = FileSystem.get(new Configuration()); Path file = new Path(finalOutputPath); if (hdfs.exists(file)) { hdfs.delete(file, true); } OutputStream os = hdfs.create(file); PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8")); //CSV File Header printWriter.println("Actual,Predicted,MER,MAE"); printWriter.flush(); for (String result : results) { String[] tokens = result.split("\\s+"); printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]); printWriter.flush(); } printWriter.close(); hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } }