Example usage for org.apache.hadoop.fs FileSystem getWorkingDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getWorkingDirectory.

Prototype

public abstract Path getWorkingDirectory();

Source Link

Document

Get the current working directory for the given FileSystem

Usage

From source file:com.splout.db.dnode.TestFetcher.java

License:Open Source License

@Test
public void testHdfsFetchingAndThrottling() throws IOException, URISyntaxException, InterruptedException {
    Configuration conf = new Configuration();
    FileSystem fS = FileSystem.getLocal(conf);

    SploutConfiguration testConfig = SploutConfiguration.getTestConfig();
    testConfig.setProperty(FetcherProperties.TEMP_DIR, "tmp-dir-" + TestFetcher.class.getName());
    testConfig.setProperty(FetcherProperties.DOWNLOAD_BUFFER, 4);
    testConfig.setProperty(FetcherProperties.BYTES_PER_SEC_THROTTLE, 8);
    Fetcher fetcher = new Fetcher(testConfig);

    final String str = "This is what happens when you don't know what to write";

    Path path = new Path("tmp-" + TestFetcher.class.getName());
    OutputStream oS = fS.create(path);
    oS.write(str.getBytes());/*from   ww w.j  a v  a 2s.c  o m*/
    oS.close();

    long startTime = System.currentTimeMillis();
    File f = fetcher.fetch(new Path(fS.getWorkingDirectory(), path.getName()).toUri().toString());
    long endTime = System.currentTimeMillis();

    double bytesPerSec = (str.getBytes().length / (double) (endTime - startTime)) * 1000;
    assertEquals(8, bytesPerSec, 0.5);

    assertTrue(f.exists());
    assertTrue(f.isDirectory());

    File file = new File(f, "tmp-" + TestFetcher.class.getName());
    assertTrue(file.exists());

    assertEquals(str, Files.toString(file, Charset.defaultCharset()));

    fS.delete(path, true);
    FileUtils.deleteDirectory(f);
}

From source file:com.thinkbiganalytics.kylo.catalog.spark.sources.spark.SparkDataSetContext.java

License:Apache License

/**
 * Resolves the specified URIs by removing files that have been previously read.
 *
 * @throws KyloCatalogException if a data set option is invalid
 * @throws IOException          if an I/O error occurs
 *//*from  ww w . jav  a 2s.  c  o m*/
@Nonnull
@SuppressWarnings({ "squid:HiddenFieldCheck", "squid:S1192" })
private List<String> resolveHighWaterMarkPaths(@Nonnull final List<String> uris) throws IOException {
    // Get configuration
    final Configuration conf = delegate.getHadoopConfiguration(client);
    final String highWaterMarkName = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.HIGH_WATER_MARK),
            SparkUtil.getOrElse(getOption(HIGH_WATER_MARK_OPTION), null));
    final Job job = Job.getInstance(conf);

    final String highWaterMarkValue = client.getHighWaterMarks().get(highWaterMarkName);
    if (highWaterMarkValue != null) {
        try {
            HighWaterMarkInputFormat.setHighWaterMark(job, Long.parseLong(highWaterMarkValue));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException(
                    "Invalid " + HIGH_WATER_MARK_OPTION + " value: " + highWaterMarkValue, e);
        }
    }

    final String maxFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MAX_FILE_AGE),
            SparkUtil.getOrElse(getOption(MAX_AGE_OPTION), null));
    if (maxFileAge != null) {
        try {
            HighWaterMarkInputFormat.setMaxFileAge(job, Long.parseLong(maxFileAge));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException("Invalid " + MAX_AGE_OPTION + " value: " + maxFileAge, e);
        }
    }

    final String minFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MIN_FILE_AGE),
            SparkUtil.getOrElse(getOption(MIN_AGE_OPTION), null));
    if (minFileAge != null) {
        try {
            HighWaterMarkInputFormat.setMinFileAge(job, Long.parseLong(minFileAge));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException("Invalid " + MIN_AGE_OPTION + " value: " + minFileAge, e);
        }
    }

    // Convert URIs to Paths
    final Path[] paths = new Path[uris.size()];

    for (int i = 0; i < uris.size(); ++i) {
        final Path path = new Path(uris.get(i));
        final FileSystem fs = path.getFileSystem(conf);
        paths[i] = path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
    }

    HighWaterMarkInputFormat.setInputPaths(job, paths);

    // Get high water mark paths
    final HighWaterMarkInputFormat inputFormat = new HighWaterMarkInputFormat();
    final List<FileStatus> files = inputFormat.listStatus(job);
    client.setHighWaterMarks(
            Collections.singletonMap(highWaterMarkName, Long.toString(inputFormat.getLastHighWaterMark())));

    // Return resolved paths
    final List<String> resolvedPaths = new ArrayList<>(files.size());

    if (files.isEmpty()) {
        resolvedPaths.add("file:/dev/null");
    } else {
        for (final FileStatus file : files) {
            resolvedPaths.add(file.getPath().toString());
        }
    }

    return resolvedPaths;
}

From source file:com.thinkbiganalytics.nifi.v2.hdfs.AbstractHadoopProcessor.java

License:Apache License

/**
 * Reset Hadoop Configuration and FileSystem based on the supplied configuration resources.
 *
 * @param configResources for configuration
 * @param dir             the target directory
 * @param context         for context, which gives access to the principal
 * @return An HdfsResources object//from www.j a v a  2  s.co  m
 * @throws IOException if unable to access HDFS
 */
HdfsResources resetHDFSResources(String configResources, String dir, ProcessContext context)
        throws IOException {
    // org.apache.hadoop.conf.Configuration saves its current thread context class loader to use for threads that it creates
    // later to do I/O. We need this class loader to be the NarClassLoader instead of the magical
    // NarThreadContextClassLoader.
    ClassLoader savedClassLoader = Thread.currentThread().getContextClassLoader();
    Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());

    try {
        Configuration config = getConfigurationFromResources(configResources);

        // first check for timeout on HDFS connection, because FileSystem has a hard coded 15 minute timeout
        checkHdfsUriForTimeout(config);

        // disable caching of Configuration and FileSystem objects, else we cannot reconfigure the processor without a complete
        // restart
        String disableCacheName = String.format("fs.%s.impl.disable.cache",
                FileSystem.getDefaultUri(config).getScheme());
        config.set(disableCacheName, "true");

        // If kerberos is enabled, create the file system as the kerberos principal
        // -- use RESOURCE_LOCK to guarantee UserGroupInformation is accessed by only a single thread at at time
        FileSystem fs = null;
        UserGroupInformation ugi = null;
        synchronized (RESOURCES_LOCK) {
            if (config.get("hadoop.security.authentication").equalsIgnoreCase("kerberos")) {
                String principal = context.getProperty(kerberosPrincipal).getValue();
                String keyTab = context.getProperty(kerberosKeytab).getValue();
                UserGroupInformation.setConfiguration(config);
                ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keyTab);
                modifyConfig(context, config);
                fs = getFileSystemAsUser(config, ugi);
                lastKerberosReloginTime = System.currentTimeMillis() / 1000;
            } else {
                config.set("ipc.client.fallback-to-simple-auth-allowed", "true");
                config.set("hadoop.security.authentication", "simple");
                modifyConfig(context, config);
                fs = getFileSystem(config);
            }
        }
        getLog().info(
                "Initialized a new HDFS File System with working dir: {} default block size: {} default replication: {} config: {}",
                new Object[] { fs.getWorkingDirectory(), fs.getDefaultBlockSize(new Path(dir)),
                        fs.getDefaultReplication(new Path(dir)), config.toString() });
        return new HdfsResources(config, fs, ugi);
    } finally {
        Thread.currentThread().setContextClassLoader(savedClassLoader);
    }
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

public static Iterable<Path> getSubdirectories(final boolean recursive, final String baseDirectory,
        final FileSystem hdfs) throws IOException {

    FileStatus[] fileStat;/* ww  w . jav  a 2s .  co m*/
    Path basePath = new Path(baseDirectory);
    if (!hdfs.exists(basePath)) {
        throw new IOException(
                hdfs.getWorkingDirectory() + baseDirectory + " does not exist, cannot getSubdirectories");
    }
    FileStatus status = hdfs.getFileStatus(basePath);
    if (!status.isDir()) {
        LOG.warning("tried to find subdirectories of " + status.getPath() + ", but it is a file");
        return Lists.newArrayList(status.getPath());
    }
    // get the stat on all files in the source directory
    fileStat = hdfs.listStatus(basePath);

    if (fileStat == null) {
        throw new IOException(
                "FileSystem.listStatus(" + basePath + ") returned null, cannot getSubdirectories");
    }

    // get paths to the files in the source directory
    return Arrays.asList(FileUtil.stat2Paths(fileStat));
}

From source file:com.yata.core.HDFSManager.java

License:Apache License

/**
 *
 * @param hdfsTestDataSourceFile//w ww .  j a v  a 2 s.  c o  m
 * @param hdfsTestDataTargetFile
 * @throws IOException
 *
 * hadoop fs -cp /projects/ddsw/dev/data/backup/dealer_hierarchy/<<DOMAIN_NAME>>/<<FILE_NAME>> /projects/ddsw/dev/data/raw/nas/<<DOMAIN_NAME>>
 */
public void copyHDFSData(String hdfsTestDataSourceFile, String hdfsTestDataTargetFile)
        throws OozieClientException {

    System.out.println("copyHDFSData@" + className + " : Loading Test Data From :-> " + hdfsTestDataSourceFile
            + " : Into :-> " + hdfsTestDataTargetFile);

    FileSystem hdfs = null;
    Path hdfsTestDataSource = null;
    Path hdfsTestDataTarget = null;

    try {

        hdfs = getHdfsFileSytem();

        System.out.println("copyHDFSData@" + className + " : HDFS :-> " + hdfs);

        System.out.println("copyHDFSData@" + className + " : HDFSHomeDirectory :-> " + hdfs.getHomeDirectory());
        System.out.println("copyHDFSData@" + className + " : HDFS-URI :-> " + hdfs.getUri());
        System.out.println(
                "copyHDFSData@" + className + " : HDFSWorkingDirectory :-> " + hdfs.getWorkingDirectory());
        System.out.println("copyHDFSData@" + className + " : HDFS : " + hdfs + " : Exists :-> "
                + hdfs.exists(hdfs.getHomeDirectory()));

        hdfsTestDataSource = new Path(hdfs.getUri().getPath() + hdfsTestDataSourceFile);
        hdfsTestDataTarget = new Path(hdfs.getUri().getPath() + hdfsTestDataTargetFile);

        System.out.println("copyHDFSData@" + className + " : HDFS TEST DATA : " + hdfsTestDataSource
                + " : Exists :-> " + hdfs.exists(hdfsTestDataSource));
        System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget
                + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget));

    } catch (IOException e) {

        e.printStackTrace();
        throw new OozieClientException("ERR_CODE_1218",
                "copyHDFSData@" + className + " : IOException while getting HDFS FileSystem - EXITING...");
    }

    FileUtil hdfsUtil = new FileUtil();

    try {

        hdfsUtil.copy(hdfs, hdfsTestDataSource, hdfs, hdfsTestDataTarget, false, true, hdfs.getConf());

        System.out.println("copyHDFSData@" + className + " : NOW : HDFS TEST DATA : " + hdfsTestDataSource
                + " : Exists :-> " + hdfs.exists(hdfsTestDataSource));
        System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget
                + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget));

    } catch (IOException e) {

        e.printStackTrace();
        throw new OozieClientException("ERR_CODE_1218",
                "copyHDFSData@" + className + " : IOException while Copying HDFS Data - EXITING...");
    }

    /**
     * IMPORTANT
     * If the Source Data file on HDFS is not owned by the Hive/Hadoop User, then use the command below to
     * change the permission for Hive/Hadoop User to move/delete the file once processed...
     */
    try {

        hdfs.setPermission(hdfsTestDataTarget,
                new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.READ_EXECUTE));
    } catch (IOException e) {

        e.printStackTrace();
        throw new OozieClientException("ERR_CODE_1218", "copyHDFSData@" + className
                + " : IOException while Changing HDFS File Permissions - EXITING...");
    }

}

From source file:countTheGivenWords.searchAndCountJob.java

public static void start(String[] args) {
    try {/*  ww w.jav  a2s  . c o  m*/

        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        searchAndCountMapper Map = new searchAndCountMapper();
        conf.setMapperClass(Map.getClass());

        searchAndCountReducer Reduce = new searchAndCountReducer();
        conf.setCombinerClass(Reduce.getClass());
        conf.setReducerClass(Reduce.getClass());

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[1]));

        Path outputDir = new Path(args[2]);

        outputDir.getFileSystem(conf).delete(outputDir, true);
        FileSystem fs = FileSystem.get(conf);
        fs.delete(outputDir, true);

        FileOutputFormat.setOutputPath(conf, outputDir);

        JobClient.runJob(conf);

        FileSystem FS = FileSystem.get(conf);

        Path src = new Path(FS.getWorkingDirectory() + "/output/part-00000");

        if (FS.exists(src)) {
            System.out.println("\t\t------ Results ------ ");
            /*
             BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(src)));
             String line;
             line = br.readLine();
             while (line != null) {
             System.out.println("\t" + line);
             line = br.readLine();
             }
             */

            List<String> FileList = (new fileInteractions()).readLines(src, conf);
            for (String LocString : FileList) {

                System.out.println(LocString);
            }
        }

    } catch (Exception Exp) {

        Exp.printStackTrace();
    }
}

From source file:etl.cmd.test.XFsTestCase.java

License:Apache License

private Path initFileSystem(FileSystem fs) throws Exception {
    Path path = new Path(fs.getWorkingDirectory(), java.util.UUID.randomUUID().toString());
    Path testDirInFs = fs.makeQualified(path);
    System.out.println(XLog.format("Setting FS testcase work dir[{0}]", testDirInFs));
    if (fs.exists(testDirInFs)) {
        setAllPermissions(fs, testDirInFs);
    }//  w  w  w  .j  ava  2  s  .com
    fs.delete(testDirInFs, true);
    if (!fs.mkdirs(path)) {
        throw new IOException(XLog.format("Could not create FS testcase dir [{0}]", testDirInFs));
    }
    fs.setOwner(testDirInFs, getTestUser(), getTestGroup());
    fs.setPermission(testDirInFs, FsPermission.valueOf("-rwxrwx--x"));
    return testDirInFs;
}

From source file:ezbakehelpers.hdfs.HDFSHelperTest.java

License:Apache License

@Test
public void testLocalFileSystem() throws Exception {
    EzConfiguration configuration = new EzConfiguration(new ClasspathConfigurationLoader());
    FileSystem fs = HDFSHelper.getFileSystemFromProperties(configuration.getProperties());
    assertTrue(fs.getWorkingDirectory().toString().startsWith("file"));
}

From source file:ezbakehelpers.hdfs.HDFSHelperTest.java

License:Apache License

@Test
public void testHAHdfsFileSystem() throws Exception {
    FileSystem fs = HDFSHelper.getFileSystemFromProperties(getHAProps());
    assertThat(fs.getWorkingDirectory().toString(), startsWith("hdfs://mycluster"));
}

From source file:hadoopProcesses.testJob.java

public static void start(String[] args) {
    try {//from   w w w .  j a v  a 2 s . c  om

        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        map Map = new map();
        conf.setMapperClass(Map.getClass());

        reducer Reduce = new reducer();
        conf.setCombinerClass(Reduce.getClass());
        conf.setReducerClass(Reduce.getClass());

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[1]));

        Path outputDir = new Path(args[2]);

        outputDir.getFileSystem(conf).delete(outputDir, true);
        FileSystem fs = FileSystem.get(conf);
        fs.delete(outputDir, true);

        FileOutputFormat.setOutputPath(conf, outputDir);

        JobClient.runJob(conf);

        FileSystem FS = FileSystem.get(conf);

        Path src = new Path(FS.getWorkingDirectory() + "/output/part-00000");

        if (FS.exists(src)) {
            System.out.println("\t\t------ Results ------ ");
            /*
             BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(src)));
             String line;
             line = br.readLine();
             while (line != null) {
             System.out.println("\t" + line);
             line = br.readLine();
             }
             */

            List<String> FileList = (new fileInteractions()).readLines(src, conf);
            for (String LocString : FileList) {

                System.out.println(LocString);
            }
        }
    } catch (Exception Exp) {
        Exp.printStackTrace();
    }
}