Example usage for org.apache.hadoop.fs FileSystem getWorkingDirectory

List of usage examples for org.apache.hadoop.fs FileSystem getWorkingDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getWorkingDirectory.

Prototype

public abstract Path getWorkingDirectory();

Source Link

Document

Get the current working directory for the given FileSystem

Usage

From source file:org.apache.slider.providers.agent.TestAppDefinitionPersister.java

License:Apache License

/**
 * @BeforeClass public static void initialize() { BasicConfigurator.resetConfiguration();
 * BasicConfigurator.configure(); }*/*from   w w  w  . j  a va 2 s. c  om*/
 */

@Test
public void testAppDefinitionPersister() throws Exception {
    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.getLocal(configuration);
    log.info("fs working dir is {}", fs.getWorkingDirectory().toString());
    SliderFileSystem sliderFileSystem = new SliderFileSystem(fs, configuration);

    AppDefinitionPersister adp = new AppDefinitionPersister(sliderFileSystem);
    String clustername = "c1";
    ActionCreateArgs buildInfo = new ActionCreateArgs();
    buildInfo.appMetaInfo = null;
    buildInfo.appDef = null;
    buildInfo.addonDelegate = new AddonArgsDelegate();

    // nothing to do
    adp.processSuppliedDefinitions(clustername, buildInfo, null);
    adp.persistPackages();
    List<AppDefinitionPersister.AppDefinition> appDefinitions = adp.getAppDefinitions();
    Assert.assertTrue(appDefinitions.size() == 0);

    ConfTree ct = new ConfTree();
    ConfTreeOperations appConf = new ConfTreeOperations(ct);
    final File tempDir = Files.createTempDir();
    final File metainfo = new File(tempDir, "metainfo.json");

    // unreadable metainfo
    buildInfo.appMetaInfo = metainfo;

    try {
        adp.processSuppliedDefinitions(clustername, buildInfo, appConf);
    } catch (BadConfigException bce) {
        log.info(bce.getMessage());
        Assert.assertTrue(bce.getMessage()
                .contains("Path specified with " + "--metainfo either cannot be read or is not a file"));
    }

    try (PrintWriter writer = new PrintWriter(metainfo.getAbsolutePath(), "UTF-8")) {
        writer.println("{");
        writer.println("}");
    }
    buildInfo.appDef = metainfo;

    try {
        adp.processSuppliedDefinitions(clustername, buildInfo, appConf);
    } catch (BadConfigException bce) {
        log.info(bce.getMessage());
        Assert.assertTrue(bce.getMessage().contains("Both --metainfo and --appdef cannot be specified"));
    }

    // both --metainfojson and --appdef cannot be specified
    buildInfo.appMetaInfo = null;
    buildInfo.appMetaInfoJson = "{}";
    try {
        adp.processSuppliedDefinitions(clustername, buildInfo, appConf);
    } catch (BadConfigException bce) {
        log.info(bce.getMessage());
        Assert.assertTrue(bce.getMessage().contains("Both --metainfojson and --appdef cannot be specified"));
    }

    buildInfo.appDef = null;

    buildInfo.appMetaInfoJson = "";
    try {
        adp.processSuppliedDefinitions(clustername, buildInfo, appConf);
    } catch (BadConfigException bce) {
        log.info(bce.getMessage());
        Assert.assertTrue(bce.getMessage().contains("Empty string specified with --metainfojson"));
    }
    buildInfo.appMetaInfo = metainfo;

    // both --metainfo and --metainfojson cannot be specified
    buildInfo.appMetaInfoJson = "{}";
    try {
        adp.processSuppliedDefinitions(clustername, buildInfo, appConf);
    } catch (BadConfigException bce) {
        log.info(bce.getMessage());
        Assert.assertTrue(bce.getMessage().contains("Both --metainfo and --metainfojson cannot be specified"));
    }
    buildInfo.appMetaInfoJson = null;

    appConf.getGlobalOptions().set(AgentKeys.APP_DEF, metainfo.getAbsolutePath());

    try {
        adp.processSuppliedDefinitions(clustername, buildInfo, appConf);
    } catch (BadConfigException bce) {
        log.info(bce.getMessage());
        Assert.assertTrue(bce.getMessage()
                .contains("application.def cannot " + "not be set if --metainfo is specified in the cmd line"));
    }

    appConf.getGlobalOptions().remove(AgentKeys.APP_DEF);

    adp.processSuppliedDefinitions(clustername, buildInfo, appConf);
    appDefinitions = adp.getAppDefinitions();
    Assert.assertTrue(appDefinitions.size() == 1);
    Assert.assertTrue(appConf.getGlobalOptions().get(AgentKeys.APP_DEF).contains("appdef/appPkg.zip"));
    log.info(appDefinitions.get(0).toString());
    Assert.assertTrue(appDefinitions.get(0).appDefPkgOrFolder.toString().endsWith("default"));
    Assert.assertTrue(appDefinitions.get(0).targetFolderInFs.toString().contains("cluster/c1/appdef"));
    Assert.assertEquals("appPkg.zip", appDefinitions.get(0).pkgName);

    buildInfo.appDef = tempDir;
    buildInfo.appMetaInfo = null;

    appConf.getGlobalOptions().set(AgentKeys.APP_DEF, metainfo.getAbsolutePath());

    try {
        adp.processSuppliedDefinitions(clustername, buildInfo, appConf);
    } catch (BadConfigException bce) {
        log.info(bce.getMessage());
        Assert.assertTrue(bce.getMessage().contains("application.def must not be set if --appdef is provided"));
    }

    adp.getAppDefinitions().clear();
    appConf.getGlobalOptions().remove(AgentKeys.APP_DEF);
    adp.processSuppliedDefinitions(clustername, buildInfo, appConf);
    appDefinitions = adp.getAppDefinitions();
    Assert.assertTrue(appDefinitions.size() == 1);
    Assert.assertTrue(appConf.getGlobalOptions().get(AgentKeys.APP_DEF).contains("appdef/appPkg.zip"));
    log.info(appDefinitions.get(0).toString());
    Assert.assertTrue(appDefinitions.get(0).appDefPkgOrFolder.toString().endsWith(tempDir.toString()));
    Assert.assertTrue(appDefinitions.get(0).targetFolderInFs.toString().contains("cluster/c1/appdef"));
    Assert.assertEquals("appPkg.zip", appDefinitions.get(0).pkgName);

    adp.getAppDefinitions().clear();
    buildInfo.appDef = null;
    buildInfo.appMetaInfo = null;
    appConf.getGlobalOptions().remove(AgentKeys.APP_DEF);

    ArrayList<String> list = new ArrayList<String>() {
        {
            add("addon1");
            add("");
            add("addon2");
            add(metainfo.getAbsolutePath());
        }
    };

    buildInfo.addonDelegate.addonTuples = list;
    try {
        adp.processSuppliedDefinitions(clustername, buildInfo, appConf);
    } catch (BadConfigException bce) {
        log.info(bce.getMessage());
        Assert.assertTrue(bce.getMessage()
                .contains("addon package can only be specified if main app package is specified"));
    }

    buildInfo.appMetaInfo = metainfo;

    try {
        adp.processSuppliedDefinitions(clustername, buildInfo, appConf);
    } catch (BadConfigException bce) {
        log.info(bce.getMessage());
        Assert.assertTrue(bce.getMessage().contains("Invalid path for addon package addon1"));
    }

    appConf.getGlobalOptions().remove(AgentKeys.APP_DEF);

    list = new ArrayList<String>() {
        {
            add("addon1");
            add(tempDir.getAbsolutePath());
            add("addon2");
            add(metainfo.getAbsolutePath());
        }
    };

    buildInfo.addonDelegate.addonTuples = list;
    adp.getAppDefinitions().clear();

    adp.processSuppliedDefinitions(clustername, buildInfo, appConf);
    appDefinitions = adp.getAppDefinitions();

    Assert.assertTrue(appDefinitions.size() == 3);
    Assert.assertTrue(appConf.getGlobalOptions().get(AgentKeys.APP_DEF).contains("appdef/appPkg.zip"));
    Assert.assertTrue(appConf.getGlobalOptions().get("application.addon.addon1")
            .contains("addons/addon1/addon_addon1.zip"));
    Assert.assertTrue(appConf.getGlobalOptions().get("application.addon.addon2")
            .contains("addons/addon2/addon_addon2.zip"));
    log.info(appConf.getGlobalOptions().get("application.addons"));
    Assert.assertTrue(appConf.getGlobalOptions().get("application.addons")
            .contains("application.addon.addon2,application.addon.addon1")
            || appConf.getGlobalOptions().get("application.addons")
                    .contains("application.addon.addon1,application.addon.addon2"));
    int seen = 0;
    for (AppDefinitionPersister.AppDefinition adp_ad : appDefinitions) {
        if (adp_ad.pkgName.equals("appPkg.zip")) {
            log.info(adp_ad.toString());
            Assert.assertTrue(adp_ad.appDefPkgOrFolder.toString().endsWith("default"));
            Assert.assertTrue(adp_ad.targetFolderInFs.toString().contains("cluster/c1/appdef"));
            seen++;
        }
        if (adp_ad.pkgName.equals("addon_addon1.zip")) {
            log.info(adp_ad.toString());
            Assert.assertTrue(adp_ad.appDefPkgOrFolder.toString().endsWith(tempDir.toString()));
            Assert.assertTrue(adp_ad.targetFolderInFs.toString().contains("addons/addon1"));
            seen++;
        }
        if (adp_ad.pkgName.equals("addon_addon2.zip")) {
            log.info(adp_ad.toString());
            Assert.assertTrue(adp_ad.appDefPkgOrFolder.toString().endsWith("metainfo.json"));
            Assert.assertTrue(adp_ad.targetFolderInFs.toString().contains("addons/addon2"));
            seen++;
        }
    }
    Assert.assertEquals(3, seen);
}

From source file:org.apache.slider.test.ContractTestUtils.java

License:Apache License

/**
 * Cleanup at the end of a test run// w w w  .j a  v  a  2s  . c  om
 * @param action action triggering the operation (for use in logging)
 * @param fileSystem filesystem to work with. May be null
 * @param cleanupPath path to delete as a string
 */
public static void cleanup(String action, FileSystem fileSystem, String cleanupPath) {
    if (fileSystem == null) {
        return;
    }
    Path path = new Path(cleanupPath).makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
    cleanup(action, fileSystem, path);
}

From source file:org.apache.solr.hadoop.MorphlineBasicMiniMRTest.java

License:Apache License

@Test
public void testPathParts() throws Exception { // see PathParts
    FileSystem fs = dfsCluster.getFileSystem();
    int dfsClusterPort = fs.getWorkingDirectory().toUri().getPort();
    assertTrue(dfsClusterPort > 0);// w  w  w . jav  a  2s .co m
    JobConf jobConf = getJobConf();
    Configuration simpleConf = new Configuration();

    for (Configuration conf : Arrays.asList(jobConf, simpleConf)) {
        for (String queryAndFragment : Arrays.asList("", "?key=value#fragment")) {
            for (String up : Arrays.asList("", "../")) {
                String down = up.length() == 0 ? "foo/" : "";
                String uploadURL = "hdfs://localhost:12345/user/foo/" + up + "bar.txt" + queryAndFragment;
                PathParts parts = new PathParts(uploadURL, conf);
                assertEquals(uploadURL, parts.getUploadURL());
                assertEquals("/user/" + down + "bar.txt", parts.getURIPath());
                assertEquals("bar.txt", parts.getName());
                assertEquals("hdfs", parts.getScheme());
                assertEquals("localhost", parts.getHost());
                assertEquals(12345, parts.getPort());
                assertEquals("hdfs://localhost:12345/user/" + down + "bar.txt", parts.getId());
                assertEquals(parts.getId(), parts.getDownloadURL());
                assertFileNotFound(parts);

                uploadURL = "hdfs://localhost/user/foo/" + up + "bar.txt" + queryAndFragment;
                parts = new PathParts(uploadURL, conf);
                assertEquals(uploadURL, parts.getUploadURL());
                assertEquals("/user/" + down + "bar.txt", parts.getURIPath());
                assertEquals("bar.txt", parts.getName());
                assertEquals("hdfs", parts.getScheme());
                assertEquals("localhost", parts.getHost());
                assertEquals(8020, parts.getPort());
                assertEquals("hdfs://localhost:8020/user/" + down + "bar.txt", parts.getId());
                assertEquals(parts.getId(), parts.getDownloadURL());
                assertFileNotFound(parts);
            }
        }
    }

    for (Configuration conf : Arrays.asList(jobConf)) {
        for (String queryAndFragment : Arrays.asList("", "?key=value#fragment")) {
            for (String up : Arrays.asList("", "../")) {
                // verify using absolute path
                String down = up.length() == 0 ? "foo/" : "";
                String uploadURL = "/user/foo/" + up + "bar.txt" + queryAndFragment;
                PathParts parts = new PathParts(uploadURL, conf);
                assertEquals(uploadURL, parts.getUploadURL());
                assertEquals("/user/" + down + "bar.txt", parts.getURIPath());
                assertEquals("bar.txt", parts.getName());
                assertEquals("hdfs", parts.getScheme());
                assertTrue(
                        "localhost".equals(parts.getHost()) || "localhost.localdomain".equals(parts.getHost()));
                assertEquals(dfsClusterPort, parts.getPort());
                assertTrue(
                        parts.getId().equals("hdfs://localhost:" + dfsClusterPort + "/user/" + down + "bar.txt")
                                || parts.getId().equals("hdfs://localhost.localdomain:" + dfsClusterPort
                                        + "/user/" + down + "bar.txt"));
                assertFileNotFound(parts);

                // verify relative path is interpreted to be relative to user's home dir and resolved to an absolute path
                uploadURL = "xuser/foo/" + up + "bar.txt" + queryAndFragment;
                parts = new PathParts(uploadURL, conf);
                assertEquals(uploadURL, parts.getUploadURL());
                String homeDir = "/user/" + System.getProperty("user.name");
                assertEquals(homeDir + "/xuser/" + down + "bar.txt", parts.getURIPath());
                assertEquals("bar.txt", parts.getName());
                assertEquals("hdfs", parts.getScheme());
                assertTrue(
                        "localhost".equals(parts.getHost()) || "localhost.localdomain".equals(parts.getHost()));
                assertEquals(dfsClusterPort, parts.getPort());
                assertTrue(parts.getId()
                        .equals("hdfs://localhost:" + dfsClusterPort + homeDir + "/xuser/" + down + "bar.txt")
                        || parts.getId().equals("hdfs://localhost.localdomain:" + dfsClusterPort + homeDir
                                + "/xuser/" + down + "bar.txt"));
                assertFileNotFound(parts);
            }
        }
    }

    try {
        new PathParts("/user/foo/bar.txt", simpleConf);
        fail("host/port resolution requires minimr conf, not a simple conf");
    } catch (IllegalArgumentException e) {
        ; // expected
    }
}

From source file:org.apache.tez.mapreduce.examples.FilterLinesByWord.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Credentials credentials = new Credentials();

    boolean generateSplitsInClient = false;

    SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
    try {/*w  ww . j a v a  2 s .  c  o  m*/
        generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
        otherArgs = splitCmdLineParser.getRemainingArgs();
    } catch (ParseException e1) {
        System.err.println("Invalid options");
        printUsage();
        return 2;
    }

    if (otherArgs.length != 3) {
        printUsage();
        return 2;
    }

    String inputPath = otherArgs[0];
    String outputPath = otherArgs[1];
    String filterWord = otherArgs[2];

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(outputPath))) {
        System.err.println("Output directory : " + outputPath + " already exists");
        return 2;
    }

    TezConfiguration tezConf = new TezConfiguration(conf);

    fs.getWorkingDirectory();
    Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
    TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);

    String jarPath = ClassUtil.findContainingJar(FilterLinesByWord.class);
    if (jarPath == null) {
        throw new TezUncheckedException(
                "Could not find any jar containing" + FilterLinesByWord.class.getName() + " in the classpath");
    }

    Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar"));
    fs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);
    TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, conf);

    Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(),
            remoteJarStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

    TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources,
            credentials);
    tezSession.start(); // Why do I need to start the TezSession.

    Configuration stage1Conf = new JobConf(conf);
    stage1Conf.set(FILTER_PARAM_NAME, filterWord);

    Configuration stage2Conf = new JobConf(conf);
    stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
    stage2Conf.setBoolean("mapred.mapper.new-api", false);

    UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
    // Setup stage1 Vertex
    Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor
            .create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload))
            .addTaskLocalFiles(commonLocalResources);

    DataSourceDescriptor dsd;
    if (generateSplitsInClient) {
        // TODO TEZ-1406. Dont' use MRInputLegacy
        stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
        stage1Conf.setBoolean("mapred.mapper.new-api", false);
        dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
    } else {
        dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false)
                .build();
    }
    stage1Vertex.addDataSource("MRInput", dsd);

    // Setup stage2 Vertex
    Vertex stage2Vertex = Vertex.create("stage2",
            ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName())
                    .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)),
            1);
    stage2Vertex.addTaskLocalFiles(commonLocalResources);

    // Configure the Output for stage2
    OutputDescriptor od = OutputDescriptor.create(MROutput.class.getName())
            .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf));
    OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(MROutputCommitter.class.getName());
    stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, ocd, null));

    UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig
            .newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf)
            .build();

    DAG dag = DAG.create("FilterLinesByWord");
    Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultBroadcastEdgeProperty());
    dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);

    LOG.info("Submitting DAG to Tez Session");
    DAGClient dagClient = tezSession.submitDAG(dag);
    LOG.info("Submitted DAG to Tez Session");

    DAGStatus dagStatus = null;
    String[] vNames = { "stage1", "stage2" };
    try {
        while (true) {
            dagStatus = dagClient.getDAGStatus(null);
            if (dagStatus.getState() == DAGStatus.State.RUNNING
                    || dagStatus.getState() == DAGStatus.State.SUCCEEDED
                    || dagStatus.getState() == DAGStatus.State.FAILED
                    || dagStatus.getState() == DAGStatus.State.KILLED
                    || dagStatus.getState() == DAGStatus.State.ERROR) {
                break;
            }
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
                // continue;
            }
        }

        while (dagStatus.getState() == DAGStatus.State.RUNNING) {
            try {
                ExampleDriver.printDAGStatus(dagClient, vNames);
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    // continue;
                }
                dagStatus = dagClient.getDAGStatus(null);
            } catch (TezException e) {
                LOG.fatal("Failed to get application progress. Exiting");
                return -1;
            }
        }

        dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));

    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }

    ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
    LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}

From source file:org.apache.tez.mapreduce.examples.FilterLinesByWordOneToOne.java

License:Apache License

@Override
public int run(String[] otherArgs) throws Exception {
    boolean generateSplitsInClient = false;
    SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
    try {//from   w  w w .  ja v a  2s .  c o m
        generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
        otherArgs = splitCmdLineParser.getRemainingArgs();
    } catch (ParseException e1) {
        System.err.println("Invalid options");
        printUsage();
        return 2;
    }

    if (otherArgs.length != 3) {
        printUsage();
        return 2;
    }

    String inputPath = otherArgs[0];
    String outputPath = otherArgs[1];
    String filterWord = otherArgs[2];

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(outputPath))) {
        System.err.println("Output directory : " + outputPath + " already exists");
        return 2;
    }

    TezConfiguration tezConf = new TezConfiguration(conf);

    fs.getWorkingDirectory();
    Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
    TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);

    String jarPath = ClassUtil.findContainingJar(FilterLinesByWordOneToOne.class);
    if (jarPath == null) {
        throw new TezUncheckedException("Could not find any jar containing"
                + FilterLinesByWordOneToOne.class.getName() + " in the classpath");
    }

    Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar"));
    fs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);

    Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(),
            remoteJarStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

    TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, null);
    tezSession.start(); // Why do I need to start the TezSession.

    Configuration stage1Conf = new JobConf(conf);
    stage1Conf.set(FILTER_PARAM_NAME, filterWord);

    Configuration stage2Conf = new JobConf(conf);

    stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
    stage2Conf.setBoolean("mapred.mapper.new-api", false);

    UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
    // Setup stage1 Vertex
    Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor
            .create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload))
            .addTaskLocalFiles(commonLocalResources);

    DataSourceDescriptor dsd;
    if (generateSplitsInClient) {
        // TODO TEZ-1406. Dont' use MRInputLegacy
        stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
        stage1Conf.setBoolean("mapred.mapper.new-api", false);
        dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
    } else {
        dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false)
                .build();
    }
    stage1Vertex.addDataSource("MRInput", dsd);

    // Setup stage2 Vertex
    Vertex stage2Vertex = Vertex.create("stage2",
            ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName())
                    .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)),
            dsd.getNumberOfShards());
    stage2Vertex.addTaskLocalFiles(commonLocalResources);

    // Configure the Output for stage2
    stage2Vertex.addDataSink("MROutput",
            DataSinkDescriptor.create(
                    OutputDescriptor.create(MROutput.class.getName())
                            .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)),
                    OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null));

    UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig
            .newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf)
            .build();

    DAG dag = DAG.create("FilterLinesByWord");
    Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultOneToOneEdgeProperty());
    dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);

    LOG.info("Submitting DAG to Tez Session");
    DAGClient dagClient = tezSession.submitDAG(dag);
    LOG.info("Submitted DAG to Tez Session");

    DAGStatus dagStatus = null;
    String[] vNames = { "stage1", "stage2" };
    try {
        while (true) {
            dagStatus = dagClient.getDAGStatus(null);
            if (dagStatus.getState() == DAGStatus.State.RUNNING
                    || dagStatus.getState() == DAGStatus.State.SUCCEEDED
                    || dagStatus.getState() == DAGStatus.State.FAILED
                    || dagStatus.getState() == DAGStatus.State.KILLED
                    || dagStatus.getState() == DAGStatus.State.ERROR) {
                break;
            }
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
                // continue;
            }
        }

        while (dagStatus.getState() == DAGStatus.State.RUNNING) {
            try {
                ExampleDriver.printDAGStatus(dagClient, vNames);
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    // continue;
                }
                dagStatus = dagClient.getDAGStatus(null);
            } catch (TezException e) {
                LOG.fatal("Failed to get application progress. Exiting");
                return -1;
            }
        }
    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }

    ExampleDriver.printDAGStatus(dagClient, vNames);
    LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}

From source file:org.apache.tez.mapreduce.output.TestMROutput.java

License:Apache License

public static LogicalIOProcessorRuntimeTask createLogicalTask(Configuration conf, TezUmbilical umbilical,
        String dagName, String vertexName) throws Exception {
    ProcessorDescriptor procDesc = ProcessorDescriptor.create(TestProcessor.class.getName());
    List<InputSpec> inputSpecs = Lists.newLinkedList();
    List<OutputSpec> outputSpecs = Lists.newLinkedList();
    outputSpecs.add(new OutputSpec("Null",
            MROutput.createConfigBuilder(conf, TestOutputFormat.class).build().getOutputDescriptor(), 1));

    TaskSpec taskSpec = new TaskSpec(TezTestUtils.getMockTaskAttemptId(0, 0, 0, 0), dagName, vertexName, -1,
            procDesc, inputSpecs, outputSpecs, null);

    FileSystem fs = FileSystem.getLocal(conf);
    Path workDir = new Path(new Path(System.getProperty("test.build.data", "/tmp")), "TestMapOutput")
            .makeQualified(fs.getUri(), fs.getWorkingDirectory());

    LogicalIOProcessorRuntimeTask task = new LogicalIOProcessorRuntimeTask(taskSpec, 0, conf,
            new String[] { workDir.toString() }, umbilical, null, new HashMap<String, String>(),
            HashMultimap.<String, String>create(), null, "", new ExecutionContextImpl("localhost"),
            Runtime.getRuntime().maxMemory(), true, new DefaultHadoopShim());
    return task;//ww  w.  java 2  s  . co m
}

From source file:org.openflamingo.util.FileSystemUtils.java

License:Apache License

/**
 *      ?./*from w  w  w .j av a2 s  . c om*/
 *     ???  ?.
 *
 * @param path 
 * @throws org.openflamingo.core.exception.FileSystemException  ?  ,   ? ?? 
 */
public static void testCreateDir(Path path) {
    try {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);

        //            FileSystem fs = path.getFileSystem(conf);

        System.out.println("--------New------------------");
        System.out.println("--------fs--------" + fs);
        System.out.println("--------fs getUri--------" + fs.getUri());
        System.out.println("--------fs getWorkingDirectory--------" + fs.getWorkingDirectory());
        System.out.println("--------fs getHomeDirectory--------" + fs.getHomeDirectory());

        System.out.println("--------path--------" + path);
        System.out.println("--------fs.exists(path)--------" + fs.exists(path));
        System.out.println("--------fs.mkdirs(path--------" + fs.mkdirs(path));

        if (fs.exists(path) && !fs.getFileStatus(path).isDir()) {
            throw new FileSystemException(ExceptionUtils.getMessage("'{}' is not directory.", path));
        }

        if (!fs.exists(path)) {
            if (!fs.mkdirs(path)) {
                throw new FileSystemException(ExceptionUtils.getMessage("Cannot create '{}'", path));
            }
        }
    } catch (Exception ex) {
        String message = MessageFormatter.format("Cannot create '{}'", path.toString()).getMessage();
        throw new FileSystemException(message, ex);
    }
}

From source file:org.trustedanalytics.ingestion.kafka2hdfs.hdfs.ToHdfsStreamConsumer.java

License:Apache License

public ToHdfsStreamConsumer(FileSystem fs, String filePath) {
    this.fs = fs;
    this.filePath = new Path(fs.getWorkingDirectory(), filePath);
    out = null;//  www .j  ava  2  s .com
}

From source file:root.benchmark.ReutersVectorizationJob.java

License:Apache License

/**
 * {@inheritDoc}/*from   w  w w .  jav  a  2s  . c  o  m*/
 */
@Override
public int run(String[] args) throws Exception {

    constructParameterList();

    if (parseArguments(args) == null) {
        return -1;
    }

    initializeConfigurationParameters();

    printJobHeader();

    Configuration conf = getConf();

    URI workingURI = new URI(conf.get("fs.default.name"));
    URI inputURI = new URI(inputDirectory);

    FileSystem workingFS = FileSystem.get(workingURI, conf);
    FileSystem inputFS = FileSystem.get(inputURI, conf);

    String workingDirectory = workingFS.getWorkingDirectory() + "/";

    outputDirectory = workingDirectory + outputDirectory;

    Path inputDirectoryPath = new Path(inputDirectory);
    Path outputDirectoryPath = new Path(outputDirectory);

    if (!inputFS.exists(inputDirectoryPath)) {
        throw new Exception("Input directory not found.");
    }
    if (workingFS.delete(outputDirectoryPath, true)) {
        System.out.println("Output directory cleaned.");
    }

    sequenceFilesDirectory = outputDirectory + sequenceFilesDirectory;
    vectorDirectory = outputDirectory + vectorDirectory;
    filenameDictionaryDirectory = outputDirectory + filenameDictionaryDirectory;
    renamedInputdirectory = outputDirectory + renamedInputdirectory;

    // 1: Renames files 1-N
    System.out.println();
    System.out.println("--------------");
    System.out.println("Renaming Files");
    System.out.println("--------------");
    System.out.println("\tInput: " + inputDirectory);
    System.out.println("\tOutput: " + renamedInputdirectory);
    System.out.println();
    String[] arguments_renameFiles = { "-i", inputDirectory, "-o", renamedInputdirectory, "-f",
            filenameDictionaryDirectory };
    ToolRunner.run(new RenameFilesJob(), arguments_renameFiles);

    // 2: Converts text to sequence file
    System.out.println();
    System.out.println("--------------------------------------");
    System.out.println("Creating Sequence Files From Directory");
    System.out.println("--------------------------------------");
    System.out.println("\tInput: " + renamedInputdirectory);
    System.out.println("\tOutput: " + sequenceFilesDirectory);
    System.out.println();
    String[] arguments_SequenceFilesFromDirectory = { "-i", renamedInputdirectory, "-o",
            sequenceFilesDirectory };
    ToolRunner.run(new SequenceFilesFromDirectory(), arguments_SequenceFilesFromDirectory);

    // 3: Creates vectors of text
    System.out.println();
    System.out.println("----------------");
    System.out.println("Creating Vectors");
    System.out.println("----------------");
    System.out.println("\tInput: " + sequenceFilesDirectory);
    System.out.println("\tOutput: " + vectorDirectory);
    System.out.println();
    String[] arguments_SparseVectorsFromSequenceFiles = { "-i", sequenceFilesDirectory, "-o", vectorDirectory,
            "-x", exclusionThreshold, "-md", minimumDocumentFrequency };
    ToolRunner.run(new SparseVectorsFromSequenceFiles(), arguments_SparseVectorsFromSequenceFiles);

    return 0;

}

From source file:root.hap.HierarchicalAffinityPropagationJob.java

License:Apache License

/**
 * {@inheritDoc}/*from  w  w  w  .  j a va 2 s .co  m*/
 */
@Override
public int run(String[] args) throws Exception {

    constructParameterList();

    if (parseArguments(args) == null) {
        return -1;
    }

    initializeConfigurationParameters();

    printJobHeader();

    Configuration conf = getConf();

    URI workingURI = new URI(conf.get("fs.default.name"));
    URI inputURI = new URI(inputDirectory);

    FileSystem workingFS = FileSystem.get(workingURI, conf);
    FileSystem inputFS = FileSystem.get(inputURI, conf);

    if (workingDirectory == null) {
        workingDirectory = workingFS.getWorkingDirectory().toString();
    }

    // Check to see if input directory exists
    Path input = new Path(inputDirectory);
    if (!inputFS.exists(input)) {
        System.err.println("Input Directory does not exist.");
        System.exit(2);
    }

    Path simMatPath = new Path(inputDirectory + simMatFileName);
    if (!inputFS.exists(simMatPath)) {
        System.err.println("Similarity Matrix File does not exist.");
        System.exit(2);
    }

    int iterations = Integer.valueOf(numIterations);
    for (int i = 0; i < iterations; i++) {

        RDargs[1] = workingDirectory + RD_File + i;
        RDargs[3] = workingDirectory + AD_File + i;
        RDargs[11] = i + "";

        // If this is the first run, read from initial input.
        if (i == 0) {
            RDargs[1] = inputDirectory;
        }

        System.out.println();
        System.out.println("----------------------");
        System.out.println("Updating Responsibilty");
        System.out.println("----------------------");
        System.out.println("\tInput: " + RDargs[1]);
        System.out.println("\tOutput: " + RDargs[3]);
        System.out.println("\tIteration: " + (i + 1) + " of " + numIterations);
        System.out.println();
        ToolRunner.run(conf, HRD, RDargs);

        // Delete input directory to remove wasted space.
        // Preserve the initial input, though.
        if (i > 0) {
            workingFS.delete(new Path(RDargs[1]), true);
        }

        ADargs[1] = workingDirectory + AD_File + i;
        ADargs[3] = workingDirectory + RD_File + (i + 1);
        ADargs[11] = i + "";

        if (i == iterations - 1) {
            ADargs[3] = workingDirectory + CD_File;
        }

        System.out.println();
        System.out.println("---------------------");
        System.out.println("Updating Availability");
        System.out.println("---------------------");
        System.out.println("\tInput: " + ADargs[1]);
        System.out.println("\tOutput: " + ADargs[3]);
        System.out.println("\tIteration: " + (i + 1) + " of " + numIterations);
        System.out.println();
        ToolRunner.run(conf, HAD, ADargs);

        workingFS.delete(new Path(ADargs[1]), true);

    }

    CDargs[1] = workingDirectory + CD_File;

    System.out.println();
    System.out.println("---------------------");
    System.out.println("Extracting Clusters");
    System.out.println("---------------------");
    System.out.println("\tInput: " + CDargs[1]);
    System.out.println("\tOutput: " + CDargs[3]);
    System.out.println();
    ToolRunner.run(conf, HCD, CDargs);

    workingFS.delete(new Path(CDargs[1]), true);

    workingFS.close();
    inputFS.close();
    return 0;
}