List of usage examples for org.apache.hadoop.fs FileSystem getWorkingDirectory
public abstract Path getWorkingDirectory();
From source file:com.splout.db.dnode.TestFetcher.java
License:Open Source License
@Test public void testHdfsFetchingAndThrottling() throws IOException, URISyntaxException, InterruptedException { Configuration conf = new Configuration(); FileSystem fS = FileSystem.getLocal(conf); SploutConfiguration testConfig = SploutConfiguration.getTestConfig(); testConfig.setProperty(FetcherProperties.TEMP_DIR, "tmp-dir-" + TestFetcher.class.getName()); testConfig.setProperty(FetcherProperties.DOWNLOAD_BUFFER, 4); testConfig.setProperty(FetcherProperties.BYTES_PER_SEC_THROTTLE, 8); Fetcher fetcher = new Fetcher(testConfig); final String str = "This is what happens when you don't know what to write"; Path path = new Path("tmp-" + TestFetcher.class.getName()); OutputStream oS = fS.create(path); oS.write(str.getBytes());/*from ww w.j a v a 2s.c o m*/ oS.close(); long startTime = System.currentTimeMillis(); File f = fetcher.fetch(new Path(fS.getWorkingDirectory(), path.getName()).toUri().toString()); long endTime = System.currentTimeMillis(); double bytesPerSec = (str.getBytes().length / (double) (endTime - startTime)) * 1000; assertEquals(8, bytesPerSec, 0.5); assertTrue(f.exists()); assertTrue(f.isDirectory()); File file = new File(f, "tmp-" + TestFetcher.class.getName()); assertTrue(file.exists()); assertEquals(str, Files.toString(file, Charset.defaultCharset())); fS.delete(path, true); FileUtils.deleteDirectory(f); }
From source file:com.thinkbiganalytics.kylo.catalog.spark.sources.spark.SparkDataSetContext.java
License:Apache License
/** * Resolves the specified URIs by removing files that have been previously read. * * @throws KyloCatalogException if a data set option is invalid * @throws IOException if an I/O error occurs *//*from ww w . jav a 2s. c o m*/ @Nonnull @SuppressWarnings({ "squid:HiddenFieldCheck", "squid:S1192" }) private List<String> resolveHighWaterMarkPaths(@Nonnull final List<String> uris) throws IOException { // Get configuration final Configuration conf = delegate.getHadoopConfiguration(client); final String highWaterMarkName = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.HIGH_WATER_MARK), SparkUtil.getOrElse(getOption(HIGH_WATER_MARK_OPTION), null)); final Job job = Job.getInstance(conf); final String highWaterMarkValue = client.getHighWaterMarks().get(highWaterMarkName); if (highWaterMarkValue != null) { try { HighWaterMarkInputFormat.setHighWaterMark(job, Long.parseLong(highWaterMarkValue)); } catch (final NumberFormatException e) { throw new KyloCatalogException( "Invalid " + HIGH_WATER_MARK_OPTION + " value: " + highWaterMarkValue, e); } } final String maxFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MAX_FILE_AGE), SparkUtil.getOrElse(getOption(MAX_AGE_OPTION), null)); if (maxFileAge != null) { try { HighWaterMarkInputFormat.setMaxFileAge(job, Long.parseLong(maxFileAge)); } catch (final NumberFormatException e) { throw new KyloCatalogException("Invalid " + MAX_AGE_OPTION + " value: " + maxFileAge, e); } } final String minFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MIN_FILE_AGE), SparkUtil.getOrElse(getOption(MIN_AGE_OPTION), null)); if (minFileAge != null) { try { HighWaterMarkInputFormat.setMinFileAge(job, Long.parseLong(minFileAge)); } catch (final NumberFormatException e) { throw new KyloCatalogException("Invalid " + MIN_AGE_OPTION + " value: " + minFileAge, e); } } // Convert URIs to Paths final Path[] paths = new Path[uris.size()]; for (int i = 0; i < uris.size(); ++i) { final Path path = new Path(uris.get(i)); final FileSystem fs = path.getFileSystem(conf); paths[i] = path.makeQualified(fs.getUri(), fs.getWorkingDirectory()); } HighWaterMarkInputFormat.setInputPaths(job, paths); // Get high water mark paths final HighWaterMarkInputFormat inputFormat = new HighWaterMarkInputFormat(); final List<FileStatus> files = inputFormat.listStatus(job); client.setHighWaterMarks( Collections.singletonMap(highWaterMarkName, Long.toString(inputFormat.getLastHighWaterMark()))); // Return resolved paths final List<String> resolvedPaths = new ArrayList<>(files.size()); if (files.isEmpty()) { resolvedPaths.add("file:/dev/null"); } else { for (final FileStatus file : files) { resolvedPaths.add(file.getPath().toString()); } } return resolvedPaths; }
From source file:com.thinkbiganalytics.nifi.v2.hdfs.AbstractHadoopProcessor.java
License:Apache License
/** * Reset Hadoop Configuration and FileSystem based on the supplied configuration resources. * * @param configResources for configuration * @param dir the target directory * @param context for context, which gives access to the principal * @return An HdfsResources object//from www.j a v a 2 s.co m * @throws IOException if unable to access HDFS */ HdfsResources resetHDFSResources(String configResources, String dir, ProcessContext context) throws IOException { // org.apache.hadoop.conf.Configuration saves its current thread context class loader to use for threads that it creates // later to do I/O. We need this class loader to be the NarClassLoader instead of the magical // NarThreadContextClassLoader. ClassLoader savedClassLoader = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); try { Configuration config = getConfigurationFromResources(configResources); // first check for timeout on HDFS connection, because FileSystem has a hard coded 15 minute timeout checkHdfsUriForTimeout(config); // disable caching of Configuration and FileSystem objects, else we cannot reconfigure the processor without a complete // restart String disableCacheName = String.format("fs.%s.impl.disable.cache", FileSystem.getDefaultUri(config).getScheme()); config.set(disableCacheName, "true"); // If kerberos is enabled, create the file system as the kerberos principal // -- use RESOURCE_LOCK to guarantee UserGroupInformation is accessed by only a single thread at at time FileSystem fs = null; UserGroupInformation ugi = null; synchronized (RESOURCES_LOCK) { if (config.get("hadoop.security.authentication").equalsIgnoreCase("kerberos")) { String principal = context.getProperty(kerberosPrincipal).getValue(); String keyTab = context.getProperty(kerberosKeytab).getValue(); UserGroupInformation.setConfiguration(config); ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keyTab); modifyConfig(context, config); fs = getFileSystemAsUser(config, ugi); lastKerberosReloginTime = System.currentTimeMillis() / 1000; } else { config.set("ipc.client.fallback-to-simple-auth-allowed", "true"); config.set("hadoop.security.authentication", "simple"); modifyConfig(context, config); fs = getFileSystem(config); } } getLog().info( "Initialized a new HDFS File System with working dir: {} default block size: {} default replication: {} config: {}", new Object[] { fs.getWorkingDirectory(), fs.getDefaultBlockSize(new Path(dir)), fs.getDefaultReplication(new Path(dir)), config.toString() }); return new HdfsResources(config, fs, ugi); } finally { Thread.currentThread().setContextClassLoader(savedClassLoader); } }
From source file:com.twitter.elephanttwin.util.HdfsUtils.java
License:Apache License
public static Iterable<Path> getSubdirectories(final boolean recursive, final String baseDirectory, final FileSystem hdfs) throws IOException { FileStatus[] fileStat;/* ww w . jav a 2s . co m*/ Path basePath = new Path(baseDirectory); if (!hdfs.exists(basePath)) { throw new IOException( hdfs.getWorkingDirectory() + baseDirectory + " does not exist, cannot getSubdirectories"); } FileStatus status = hdfs.getFileStatus(basePath); if (!status.isDir()) { LOG.warning("tried to find subdirectories of " + status.getPath() + ", but it is a file"); return Lists.newArrayList(status.getPath()); } // get the stat on all files in the source directory fileStat = hdfs.listStatus(basePath); if (fileStat == null) { throw new IOException( "FileSystem.listStatus(" + basePath + ") returned null, cannot getSubdirectories"); } // get paths to the files in the source directory return Arrays.asList(FileUtil.stat2Paths(fileStat)); }
From source file:com.yata.core.HDFSManager.java
License:Apache License
/** * * @param hdfsTestDataSourceFile//w ww . j a v a 2 s. c o m * @param hdfsTestDataTargetFile * @throws IOException * * hadoop fs -cp /projects/ddsw/dev/data/backup/dealer_hierarchy/<<DOMAIN_NAME>>/<<FILE_NAME>> /projects/ddsw/dev/data/raw/nas/<<DOMAIN_NAME>> */ public void copyHDFSData(String hdfsTestDataSourceFile, String hdfsTestDataTargetFile) throws OozieClientException { System.out.println("copyHDFSData@" + className + " : Loading Test Data From :-> " + hdfsTestDataSourceFile + " : Into :-> " + hdfsTestDataTargetFile); FileSystem hdfs = null; Path hdfsTestDataSource = null; Path hdfsTestDataTarget = null; try { hdfs = getHdfsFileSytem(); System.out.println("copyHDFSData@" + className + " : HDFS :-> " + hdfs); System.out.println("copyHDFSData@" + className + " : HDFSHomeDirectory :-> " + hdfs.getHomeDirectory()); System.out.println("copyHDFSData@" + className + " : HDFS-URI :-> " + hdfs.getUri()); System.out.println( "copyHDFSData@" + className + " : HDFSWorkingDirectory :-> " + hdfs.getWorkingDirectory()); System.out.println("copyHDFSData@" + className + " : HDFS : " + hdfs + " : Exists :-> " + hdfs.exists(hdfs.getHomeDirectory())); hdfsTestDataSource = new Path(hdfs.getUri().getPath() + hdfsTestDataSourceFile); hdfsTestDataTarget = new Path(hdfs.getUri().getPath() + hdfsTestDataTargetFile); System.out.println("copyHDFSData@" + className + " : HDFS TEST DATA : " + hdfsTestDataSource + " : Exists :-> " + hdfs.exists(hdfsTestDataSource)); System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget)); } catch (IOException e) { e.printStackTrace(); throw new OozieClientException("ERR_CODE_1218", "copyHDFSData@" + className + " : IOException while getting HDFS FileSystem - EXITING..."); } FileUtil hdfsUtil = new FileUtil(); try { hdfsUtil.copy(hdfs, hdfsTestDataSource, hdfs, hdfsTestDataTarget, false, true, hdfs.getConf()); System.out.println("copyHDFSData@" + className + " : NOW : HDFS TEST DATA : " + hdfsTestDataSource + " : Exists :-> " + hdfs.exists(hdfsTestDataSource)); System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget)); } catch (IOException e) { e.printStackTrace(); throw new OozieClientException("ERR_CODE_1218", "copyHDFSData@" + className + " : IOException while Copying HDFS Data - EXITING..."); } /** * IMPORTANT * If the Source Data file on HDFS is not owned by the Hive/Hadoop User, then use the command below to * change the permission for Hive/Hadoop User to move/delete the file once processed... */ try { hdfs.setPermission(hdfsTestDataTarget, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.READ_EXECUTE)); } catch (IOException e) { e.printStackTrace(); throw new OozieClientException("ERR_CODE_1218", "copyHDFSData@" + className + " : IOException while Changing HDFS File Permissions - EXITING..."); } }
From source file:countTheGivenWords.searchAndCountJob.java
public static void start(String[] args) { try {/* ww w.jav a2s . c o m*/ JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); searchAndCountMapper Map = new searchAndCountMapper(); conf.setMapperClass(Map.getClass()); searchAndCountReducer Reduce = new searchAndCountReducer(); conf.setCombinerClass(Reduce.getClass()); conf.setReducerClass(Reduce.getClass()); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[1])); Path outputDir = new Path(args[2]); outputDir.getFileSystem(conf).delete(outputDir, true); FileSystem fs = FileSystem.get(conf); fs.delete(outputDir, true); FileOutputFormat.setOutputPath(conf, outputDir); JobClient.runJob(conf); FileSystem FS = FileSystem.get(conf); Path src = new Path(FS.getWorkingDirectory() + "/output/part-00000"); if (FS.exists(src)) { System.out.println("\t\t------ Results ------ "); /* BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(src))); String line; line = br.readLine(); while (line != null) { System.out.println("\t" + line); line = br.readLine(); } */ List<String> FileList = (new fileInteractions()).readLines(src, conf); for (String LocString : FileList) { System.out.println(LocString); } } } catch (Exception Exp) { Exp.printStackTrace(); } }
From source file:etl.cmd.test.XFsTestCase.java
License:Apache License
private Path initFileSystem(FileSystem fs) throws Exception { Path path = new Path(fs.getWorkingDirectory(), java.util.UUID.randomUUID().toString()); Path testDirInFs = fs.makeQualified(path); System.out.println(XLog.format("Setting FS testcase work dir[{0}]", testDirInFs)); if (fs.exists(testDirInFs)) { setAllPermissions(fs, testDirInFs); }// w w w .j ava 2 s .com fs.delete(testDirInFs, true); if (!fs.mkdirs(path)) { throw new IOException(XLog.format("Could not create FS testcase dir [{0}]", testDirInFs)); } fs.setOwner(testDirInFs, getTestUser(), getTestGroup()); fs.setPermission(testDirInFs, FsPermission.valueOf("-rwxrwx--x")); return testDirInFs; }
From source file:ezbakehelpers.hdfs.HDFSHelperTest.java
License:Apache License
@Test public void testLocalFileSystem() throws Exception { EzConfiguration configuration = new EzConfiguration(new ClasspathConfigurationLoader()); FileSystem fs = HDFSHelper.getFileSystemFromProperties(configuration.getProperties()); assertTrue(fs.getWorkingDirectory().toString().startsWith("file")); }
From source file:ezbakehelpers.hdfs.HDFSHelperTest.java
License:Apache License
@Test public void testHAHdfsFileSystem() throws Exception { FileSystem fs = HDFSHelper.getFileSystemFromProperties(getHAProps()); assertThat(fs.getWorkingDirectory().toString(), startsWith("hdfs://mycluster")); }
From source file:hadoopProcesses.testJob.java
public static void start(String[] args) { try {//from w w w . j a v a 2 s . c om JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); map Map = new map(); conf.setMapperClass(Map.getClass()); reducer Reduce = new reducer(); conf.setCombinerClass(Reduce.getClass()); conf.setReducerClass(Reduce.getClass()); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[1])); Path outputDir = new Path(args[2]); outputDir.getFileSystem(conf).delete(outputDir, true); FileSystem fs = FileSystem.get(conf); fs.delete(outputDir, true); FileOutputFormat.setOutputPath(conf, outputDir); JobClient.runJob(conf); FileSystem FS = FileSystem.get(conf); Path src = new Path(FS.getWorkingDirectory() + "/output/part-00000"); if (FS.exists(src)) { System.out.println("\t\t------ Results ------ "); /* BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(src))); String line; line = br.readLine(); while (line != null) { System.out.println("\t" + line); line = br.readLine(); } */ List<String> FileList = (new fileInteractions()).readLines(src, conf); for (String LocString : FileList) { System.out.println(LocString); } } } catch (Exception Exp) { Exp.printStackTrace(); } }