Example usage for org.apache.hadoop.fs FileSystem getWorkingDirectory

List of usage examples for org.apache.hadoop.fs FileSystem getWorkingDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getWorkingDirectory.

Prototype

public abstract Path getWorkingDirectory();

Source Link

Document

Get the current working directory for the given FileSystem

Usage

From source file:com.splout.db.dnode.TestFetcher.java

License:Open Source License

@Test
public void testHdfsFetchingAndThrottling() throws IOException, URISyntaxException, InterruptedException {
    Configuration conf = new Configuration();
    FileSystem fS = FileSystem.getLocal(conf);

    SploutConfiguration testConfig = SploutConfiguration.getTestConfig();
    testConfig.setProperty(FetcherProperties.TEMP_DIR, "tmp-dir-" + TestFetcher.class.getName());
    testConfig.setProperty(FetcherProperties.DOWNLOAD_BUFFER, 4);
    testConfig.setProperty(FetcherProperties.BYTES_PER_SEC_THROTTLE, 8);
    Fetcher fetcher = new Fetcher(testConfig);

    final String str = "This is what happens when you don't know what to write";

    Path path = new Path("tmp-" + TestFetcher.class.getName());
    OutputStream oS = fS.create(path);
    oS.write(str.getBytes());/*from   ww w.j  a v  a 2s.c  o m*/
    oS.close();

    long startTime = System.currentTimeMillis();
    File f = fetcher.fetch(new Path(fS.getWorkingDirectory(), path.getName()).toUri().toString());
    long endTime = System.currentTimeMillis();

    double bytesPerSec = (str.getBytes().length / (double) (endTime - startTime)) * 1000;
    assertEquals(8, bytesPerSec, 0.5);

    assertTrue(f.exists());
    assertTrue(f.isDirectory());

    File file = new File(f, "tmp-" + TestFetcher.class.getName());
    assertTrue(file.exists());

    assertEquals(str, Files.toString(file, Charset.defaultCharset()));

    fS.delete(path, true);
    FileUtils.deleteDirectory(f);
}

From source file:com.thinkbiganalytics.kylo.catalog.spark.sources.spark.SparkDataSetContext.java

License:Apache License

/**
 * Resolves the specified URIs by removing files that have been previously read.
 *
 * @throws KyloCatalogException if a data set option is invalid
 * @throws IOException          if an I/O error occurs
 *//*from  ww w . jav  a 2s.  c  o m*/
@Nonnull
@SuppressWarnings({ "squid:HiddenFieldCheck", "squid:S1192" })
private List<String> resolveHighWaterMarkPaths(@Nonnull final List<String> uris) throws IOException {
    // Get configuration
    final Configuration conf = delegate.getHadoopConfiguration(client);
    final String highWaterMarkName = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.HIGH_WATER_MARK),
            SparkUtil.getOrElse(getOption(HIGH_WATER_MARK_OPTION), null));
    final Job job = Job.getInstance(conf);

    final String highWaterMarkValue = client.getHighWaterMarks().get(highWaterMarkName);
    if (highWaterMarkValue != null) {
        try {
            HighWaterMarkInputFormat.setHighWaterMark(job, Long.parseLong(highWaterMarkValue));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException(
                    "Invalid " + HIGH_WATER_MARK_OPTION + " value: " + highWaterMarkValue, e);
        }
    }

    final String maxFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MAX_FILE_AGE),
            SparkUtil.getOrElse(getOption(MAX_AGE_OPTION), null));
    if (maxFileAge != null) {
        try {
            HighWaterMarkInputFormat.setMaxFileAge(job, Long.parseLong(maxFileAge));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException("Invalid " + MAX_AGE_OPTION + " value: " + maxFileAge, e);
        }
    }

    final String minFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MIN_FILE_AGE),
            SparkUtil.getOrElse(getOption(MIN_AGE_OPTION), null));
    if (minFileAge != null) {
        try {
            HighWaterMarkInputFormat.setMinFileAge(job, Long.parseLong(minFileAge));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException("Invalid " + MIN_AGE_OPTION + " value: " + minFileAge, e);
        }
    }

    // Convert URIs to Paths
    final Path[] paths = new Path[uris.size()];

    for (int i = 0; i < uris.size(); ++i) {
        final Path path = new Path(uris.get(i));
        final FileSystem fs = path.getFileSystem(conf);
        paths[i] = path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
    }

    HighWaterMarkInputFormat.setInputPaths(job, paths);

    // Get high water mark paths
    final HighWaterMarkInputFormat inputFormat = new HighWaterMarkInputFormat();
    final List<FileStatus> files = inputFormat.listStatus(job);
    client.setHighWaterMarks(
            Collections.singletonMap(highWaterMarkName, Long.toString(inputFormat.getLastHighWaterMark())));

    // Return resolved paths
    final List<String> resolvedPaths = new ArrayList<>(files.size());

    if (files.isEmpty()) {
        resolvedPaths.add("file:/dev/null");
    } else {
        for (final FileStatus file : files) {
            resolvedPaths.add(file.getPath().toString());
        }
    }

    return resolvedPaths;
}

From source file:com.thinkbiganalytics.nifi.v2.hdfs.AbstractHadoopProcessor.java

License:Apache License

/**
 * Reset Hadoop Configuration and FileSystem based on the supplied configuration resources.
 *
 * @param configResources for configuration
 * @param dir             the target directory
 * @param context         for context, which gives access to the principal
 * @return An HdfsResources object//from www.j a v a  2  s.co  m
 * @throws IOException if unable to access HDFS
 */
HdfsResources resetHDFSResources(String configResources, String dir, ProcessContext context)
        throws IOException {
    // org.apache.hadoop.conf.Configuration saves its current thread context class loader to use for threads that it creates
    // later to do I/O. We need this class loader to be the NarClassLoader instead of the magical
    // NarThreadContextClassLoader.
    ClassLoader savedClassLoader = Thread.currentThread().getContextClassLoader();
    Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());

    try {
        Configuration config = getConfigurationFromResources(configResources);

        // first check for timeout on HDFS connection, because FileSystem has a hard coded 15 minute timeout
        checkHdfsUriForTimeout(config);

        // disable caching of Configuration and FileSystem objects, else we cannot reconfigure the processor without a complete
        // restart
        String disableCacheName = String.format("fs.%s.impl.disable.cache",
                FileSystem.getDefaultUri(config).getScheme());
        config.set(disableCacheName, "true");

        // If kerberos is enabled, create the file system as the kerberos principal
        // -- use RESOURCE_LOCK to guarantee UserGroupInformation is accessed by only a single thread at at time
        FileSystem fs = null;
        UserGroupInformation ugi = null;
        synchronized (RESOURCES_LOCK) {
            if (config.get("hadoop.security.authentication").equalsIgnoreCase("kerberos")) {
                String principal = context.getProperty(kerberosPrincipal).getValue();
                String keyTab = context.getProperty(kerberosKeytab).getValue();
                UserGroupInformation.setConfiguration(config);
                ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keyTab);
                modifyConfig(context, config);
                fs = getFileSystemAsUser(config, ugi);
                lastKerberosReloginTime = System.currentTimeMillis() / 1000;
            } else {
                config.set("ipc.client.fallback-to-simple-auth-allowed", "true");
                config.set("hadoop.security.authentication", "simple");
                modifyConfig(context, config);
                fs = getFileSystem(config);
            }
        }
        getLog().info(
                "Initialized a new HDFS File System with working dir: {} default block size: {} default replication: {} config: {}",
                new Object[] { fs.getWorkingDirectory(), fs.getDefaultBlockSize(new Path(dir)),
                        fs.getDefaultReplication(new Path(dir)), config.toString() });
        return new HdfsResources(config, fs, ugi);
    } finally {
        Thread.currentThread().setContextClassLoader(savedClassLoader);
    }
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

public static Iterable<Path> getSubdirectories(final boolean recursive, final String baseDirectory,
        final FileSystem hdfs) throws IOException {

    FileStatus[] fileStat;/* ww  w . jav  a 2s .  co m*/
    Path basePath = new Path(baseDirectory);
    if (!hdfs.exists(basePath)) {
        throw new IOException(
                hdfs.getWorkingDirectory() + baseDirectory + " does not exist, cannot getSubdirectories");
    }
    FileStatus status = hdfs.getFileStatus(basePath);
    if (!status.isDir()) {
        LOG.warning("tried to find subdirectories of " + status.getPath() + ", but it is a file");
        return Lists.newArrayList(status.getPath());
    }
    // get the stat on all files in the source directory
    fileStat = hdfs.listStatus(basePath);

    if (fileStat == null) {
        throw new IOException(
                "FileSystem.listStatus(" + basePath + ") returned null, cannot getSubdirectories");
    }

    // get paths to the files in the source directory
    return Arrays.asList(FileUtil.stat2Paths(fileStat));
}

From source file:com.yata.core.HDFSManager.java

License:Apache License

/**
 *
 * @param hdfsTestDataSourceFile//w ww .  j a v  a 2 s.  c o  m
 * @param hdfsTestDataTargetFile
 * @throws IOException
 *
 * hadoop fs -cp /projects/ddsw/dev/data/backup/dealer_hierarchy/<<DOMAIN_NAME>>/<<FILE_NAME>> /projects/ddsw/dev/data/raw/nas/<<DOMAIN_NAME>>
 */
public void copyHDFSData(String hdfsTestDataSourceFile, String hdfsTestDataTargetFile)
        throws OozieClientException {

    System.out.println("copyHDFSData@" + className + " : Loading Test Data From :-> " + hdfsTestDataSourceFile
            + " : Into :-> " + hdfsTestDataTargetFile);

    FileSystem hdfs = null;
    Path hdfsTestDataSource = null;
    Path hdfsTestDataTarget = null;

    try {

        hdfs = getHdfsFileSytem();

        System.out.println("copyHDFSData@" + className + " : HDFS :-> " + hdfs);

        System.out.println("copyHDFSData@" + className + " : HDFSHomeDirectory :-> " + hdfs.getHomeDirectory());
        System.out.println("copyHDFSData@" + className + " : HDFS-URI :-> " + hdfs.getUri());
        System.out.println(
                "copyHDFSData@" + className + " : HDFSWorkingDirectory :-> " + hdfs.getWorkingDirectory());
        System.out.println("copyHDFSData@" + className + " : HDFS : " + hdfs + " : Exists :-> "
                + hdfs.exists(hdfs.getHomeDirectory()));

        hdfsTestDataSource = new Path(hdfs.getUri().getPath() + hdfsTestDataSourceFile);
        hdfsTestDataTarget = new Path(hdfs.getUri().getPath() + hdfsTestDataTargetFile);

        System.out.println("copyHDFSData@" + className + " : HDFS TEST DATA : " + hdfsTestDataSource
                + " : Exists :-> " + hdfs.exists(hdfsTestDataSource));
        System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget
                + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget));

    } catch (IOException e) {

        e.printStackTrace();
        throw new OozieClientException("ERR_CODE_1218",
                "copyHDFSData@" + className + " : IOException while getting HDFS FileSystem - EXITING...");
    }

    FileUtil hdfsUtil = new FileUtil();

    try {

        hdfsUtil.copy(hdfs, hdfsTestDataSource, hdfs, hdfsTestDataTarget, false, true, hdfs.getConf());

        System.out.println("copyHDFSData@" + className + " : NOW : HDFS TEST DATA : " + hdfsTestDataSource
                + " : Exists :-> " + hdfs.exists(hdfsTestDataSource));
        System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget
                + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget));

    } catch (IOException e) {

        e.printStackTrace();
        throw new OozieClientException("ERR_CODE_1218",
                "copyHDFSData@" + className + " : IOException while Copying HDFS Data - EXITING...");
    }

    /**
     * IMPORTANT
     * If the Source Data file on HDFS is not owned by the Hive/Hadoop User, then use the command below to
     * change the permission for Hive/Hadoop User to move/delete the file once processed...
     */
    try {

        hdfs.setPermission(hdfsTestDataTarget,
                new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.READ_EXECUTE));
    } catch (IOException e) {

        e.printStackTrace();
        throw new OozieClientException("ERR_CODE_1218", "copyHDFSData@" + className
                + " : IOException while Changing HDFS File Permissions - EXITING...");
    }

}

From source file:countTheGivenWords.searchAndCountJob.java

public static void start(String[] args) {
    try {/*  ww w.jav  a2s  . c o  m*/

        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        searchAndCountMapper Map = new searchAndCountMapper();
        conf.setMapperClass(Map.getClass());

        searchAndCountReducer Reduce = new searchAndCountReducer();
        conf.setCombinerClass(Reduce.getClass());
        conf.setReducerClass(Reduce.getClass());

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[1]));

        Path outputDir = new Path(args[2]);

        outputDir.getFileSystem(conf).delete(outputDir, true);
        FileSystem fs = FileSystem.get(conf);
        fs.delete(outputDir, true);

        FileOutputFormat.setOutputPath(conf, outputDir);

        JobClient.runJob(conf);

        FileSystem FS = FileSystem.get(conf);

        Path src = new Path(FS.getWorkingDirectory() + "/output/part-00000");

        if (FS.exists(src)) {
            System.out.println("\t\t------ Results ------ ");
            /*
             BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(src)));
             String line;
             line = br.readLine();
             while (line != null) {
             System.out.println("\t" + line);
             line = br.readLine();
             }
             */

            List<String> FileList = (new fileInteractions()).readLines(src, conf);
            for (String LocString : FileList) {

                System.out.println(LocString);
            }
        }

    } catch (Exception Exp) {

        Exp.printStackTrace();
    }
}

From source file:etl.cmd.test.XFsTestCase.java

License:Apache License

private Path initFileSystem(FileSystem fs) throws Exception {
    Path path = new Path(fs.getWorkingDirectory(), java.util.UUID.randomUUID().toString());
    Path testDirInFs = fs.makeQualified(path);
    System.out.println(XLog.format("Setting FS testcase work dir[{0}]", testDirInFs));
    if (fs.exists(testDirInFs)) {
        setAllPermissions(fs, testDirInFs);
    }//  w  w  w  .j  ava  2  s  .com
    fs.delete(testDirInFs, true);
    if (!fs.mkdirs(path)) {
        throw new IOException(XLog.format("Could not create FS testcase dir [{0}]", testDirInFs));
    }
    fs.setOwner(testDirInFs, getTestUser(), getTestGroup());
    fs.setPermission(testDirInFs, FsPermission.valueOf("-rwxrwx--x"));
    return testDirInFs;
}

From source file:ezbakehelpers.hdfs.HDFSHelperTest.java

License:Apache License

@Test
public void testLocalFileSystem() throws Exception {
    EzConfiguration configuration = new EzConfiguration(new ClasspathConfigurationLoader());
    FileSystem fs = HDFSHelper.getFileSystemFromProperties(configuration.getProperties());
    assertTrue(fs.getWorkingDirectory().toString().startsWith("file"));
}

From source file:ezbakehelpers.hdfs.HDFSHelperTest.java

License:Apache License

@Test
public void testHAHdfsFileSystem() throws Exception {
    FileSystem fs = HDFSHelper.getFileSystemFromProperties(getHAProps());
    assertThat(fs.getWorkingDirectory().toString(), startsWith("hdfs://mycluster"));
}

From source file:hadoopProcesses.testJob.java

public static void start(String[] args) {
    try {//from   w w w .  j a v  a 2 s . c  om

        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        map Map = new map();
        conf.setMapperClass(Map.getClass());

        reducer Reduce = new reducer();
        conf.setCombinerClass(Reduce.getClass());
        conf.setReducerClass(Reduce.getClass());

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[1]));

        Path outputDir = new Path(args[2]);

        outputDir.getFileSystem(conf).delete(outputDir, true);
        FileSystem fs = FileSystem.get(conf);
        fs.delete(outputDir, true);

        FileOutputFormat.setOutputPath(conf, outputDir);

        JobClient.runJob(conf);

        FileSystem FS = FileSystem.get(conf);

        Path src = new Path(FS.getWorkingDirectory() + "/output/part-00000");

        if (FS.exists(src)) {
            System.out.println("\t\t------ Results ------ ");
            /*
             BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(src)));
             String line;
             line = br.readLine();
             while (line != null) {
             System.out.println("\t" + line);
             line = br.readLine();
             }
             */

            List<String> FileList = (new fileInteractions()).readLines(src, conf);
            for (String LocString : FileList) {

                System.out.println(LocString);
            }
        }
    } catch (Exception Exp) {
        Exp.printStackTrace();
    }
}