Example usage for org.apache.hadoop.conf Configuration setStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setStrings.

Prototype

public void setStrings(String name, String... values)

Source Link

Document

Set the array of string values for the name property as as comma delimited values.

Usage

From source file:org.apache.mahout.avro.text.mapreduce.AvroDocumentProcessor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        return 0;
    }//from w  w  w  .  j  a  v  a 2  s  . c  o  m

    conf.setStrings("io.serializations",
            new String[] { WritableSerialization.class.getName(), AvroSpecificSerialization.class.getName(),
                    AvroReflectSerialization.class.getName(), AvroGenericSerialization.class.getName() });

    AvroComparator.setSchema(AvroDocument._SCHEMA); //TODO: must be done in mapper, reducer configure method.

    conf.setClass("mapred.output.key.comparator.class", AvroComparator.class, RawComparator.class);

    Job job = new Job(conf, "document processor");
    job.setJarByClass(AvroDocumentProcessor.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setReducerClass(Reducer.class);
    job.setOutputKeyClass(AvroDocument.class);
    job.setOutputValueClass(NullWritable.class);

    job.setInputFormatClass(AvroInputFormat.class);
    job.setOutputFormatClass(AvroOutputFormat.class);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);

    FileSystem fs = FileSystem.get(conf);
    fs.delete(output, true);

    AvroInputFormat.setAvroInputClass(job, AvroDocument.class);
    AvroOutputFormat.setAvroOutputClass(job, AvroDocument.class);

    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:org.apache.mahout.avro.text.mapreduce.AvroDocumentsWordCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        return 0;
    }/*from ww w  .  j  a  v a  2 s.  c  o m*/

    conf.setStrings("io.serializations",
            new String[] { WritableSerialization.class.getName(), AvroSpecificSerialization.class.getName(),
                    AvroReflectSerialization.class.getName(), AvroGenericSerialization.class.getName() });

    Job job = new Job(conf, "word count");
    job.setJarByClass(AvroDocumentsWordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(AvroInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);

    FileSystem fs = FileSystem.get(conf);
    fs.delete(output, true);

    AvroInputFormat.setAvroInputClass(job, AvroDocument.class);
    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:org.apache.mahout.classifier.svm.mapreduce.MapReduceUtil.java

License:Apache License

public static void checkParameters(Configuration oldConf, Class<? extends Mapper> innerMapper,
        Class<? extends Mapper> innerCombiner, Class<? extends Reducer> innerReducer)
        throws IOException, InterruptedException, NoSuchMethodException, IllegalAccessException,
        IllegalArgumentException, InvocationTargetException, ClassNotFoundException, InstantiationException {
    Configuration conf = copyConfiguration(oldConf); // get a copy of it

    // important, for reducer use
    conf.setStrings("io.serializations", "org.apache.hadoop.io.serializer.WritableSerialization");

}

From source file:org.apache.mnemonic.hadoop.MneConfigHelper.java

License:Apache License

public static void setEntityFactoryProxies(Configuration conf, String prefix, Class<?>[] proxies) {
    List<String> vals = new ArrayList<>();
    for (Class<?> itm : proxies) {
        vals.add(itm.getName());/*from www.  j av  a 2  s .  co m*/
    }
    conf.setStrings(getConfigName(prefix, ENTITY_FACTORY_PROXIES), vals.toArray(new String[0]));
}

From source file:org.apache.oozie.action.hadoop.LauncherHelper.java

License:Apache License

public static void setupLauncherInfo(Configuration launcherConf, String jobId, String actionId, Path actionDir,
        String recoveryId, Configuration actionConf, String prepareXML)
        throws IOException, HadoopAccessorException {

    launcherConf.set(LauncherAMUtils.OOZIE_JOB_ID, jobId);
    launcherConf.set(LauncherAMUtils.OOZIE_ACTION_ID, actionId);
    launcherConf.set(LauncherAMUtils.OOZIE_ACTION_DIR_PATH, actionDir.toString());
    launcherConf.set(LauncherAMUtils.OOZIE_ACTION_RECOVERY_ID, recoveryId);
    launcherConf.set(LauncherAMUtils.ACTION_PREPARE_XML, prepareXML);

    actionConf.set(LauncherAMUtils.OOZIE_JOB_ID, jobId);
    actionConf.set(LauncherAMUtils.OOZIE_ACTION_ID, actionId);

    if (Services.get().getConf().getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache",
            false)) {// www  . ja v a 2s  .  co m
        List<String> purgedEntries = new ArrayList<>();
        Collection<String> entries = actionConf.getStringCollection("mapreduce.job.cache.files");
        for (String entry : entries) {
            if (entry.contains("#")) {
                purgedEntries.add(entry);
            }
        }
        actionConf.setStrings("mapreduce.job.cache.files",
                purgedEntries.toArray(new String[purgedEntries.size()]));
        launcherConf.setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true);
    }
}

From source file:org.apache.oozie.action.hadoop.LauncherMapperHelper.java

License:Apache License

public static void setupLauncherInfo(JobConf launcherConf, String jobId, String actionId, Path actionDir,
        String recoveryId, Configuration actionConf, String prepareXML)
        throws IOException, HadoopAccessorException {

    launcherConf.setMapperClass(LauncherMapper.class);
    launcherConf.setSpeculativeExecution(false);
    launcherConf.setNumMapTasks(1);//from  w w w.  j a  v a2  s  .  c  o m
    launcherConf.setNumReduceTasks(0);

    launcherConf.set(LauncherMapper.OOZIE_JOB_ID, jobId);
    launcherConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId);
    launcherConf.set(LauncherMapper.OOZIE_ACTION_DIR_PATH, actionDir.toString());
    launcherConf.set(LauncherMapper.OOZIE_ACTION_RECOVERY_ID, recoveryId);
    launcherConf.set(LauncherMapper.ACTION_PREPARE_XML, prepareXML);

    actionConf.set(LauncherMapper.OOZIE_JOB_ID, jobId);
    actionConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId);

    if (Services.get().getConf().getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache",
            false)) {
        List<String> purgedEntries = new ArrayList<String>();
        Collection<String> entries = actionConf.getStringCollection("mapreduce.job.cache.files");
        for (String entry : entries) {
            if (entry.contains("#")) {
                purgedEntries.add(entry);
            }
        }
        actionConf.setStrings("mapreduce.job.cache.files",
                purgedEntries.toArray(new String[purgedEntries.size()]));
        launcherConf.setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true);
    }

    FileSystem fs = Services.get().get(HadoopAccessorService.class)
            .createFileSystem(launcherConf.get("user.name"), actionDir.toUri(), launcherConf);
    fs.mkdirs(actionDir);

    OutputStream os = fs.create(new Path(actionDir, LauncherMapper.ACTION_CONF_XML));
    try {
        actionConf.writeXml(os);
    } finally {
        IOUtils.closeSafely(os);
    }

    launcherConf.setInputFormat(OozieLauncherInputFormat.class);
    launcherConf.set("mapred.output.dir", new Path(actionDir, "output").toString());
}

From source file:org.apache.oozie.service.TestActionCheckerService.java

License:Apache License

/**
 * Tests the delayed check functionality of the Action Check Service
 * Runnable. </p> Starts an action which behaves like an Async Action
 * (Action and Job state set to Running). Verifies the action status to be
 * RUNNING. </p> Updates the last check time to now, and attempts to run the
 * ActionCheckRunnable with the delay configured to 20 seconds.
 *
 * @throws Exception//  w  w w  .  j a va2 s  .c  o  m
 */
public void testActionCheckerServiceDelay() throws Exception {
    Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1);
    Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml"));
    IOUtils.copyCharStream(reader, writer);

    final DagEngine engine = new DagEngine("u");
    Configuration conf = new XConfiguration();
    conf.set(OozieClient.APP_PATH, getTestCaseFileUri("workflow.xml"));
    conf.setStrings(WorkflowAppService.HADOOP_USER, getTestUser());
    conf.setStrings(OozieClient.GROUP_NAME, getTestGroup());

    conf.set(OozieClient.LOG_TOKEN, "t");

    conf.set("external-status", "ok");
    conf.set("signal-value", "based_on_action_status");
    conf.set("running-mode", "async");

    final String jobId = engine.submitJob(conf, true);
    sleep(200);

    waitFor(5000, new Predicate() {
        public boolean evaluate() throws Exception {
            return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.RUNNING);
        }
    });

    sleep(100);

    JPAService jpaService = Services.get().get(JPAService.class);
    assertNotNull(jpaService);
    WorkflowActionsGetForJobJPAExecutor actionsGetExecutor = new WorkflowActionsGetForJobJPAExecutor(jobId);
    List<WorkflowActionBean> actions = jpaService.execute(actionsGetExecutor);
    WorkflowActionBean action = null;
    for (WorkflowActionBean bean : actions) {
        if (bean.getType().equals("test")) {
            action = bean;
            break;
        }
    }
    assertNotNull(action);
    assertEquals(WorkflowActionBean.Status.RUNNING, action.getStatus());

    action.setLastCheckTime(new Date());
    WorkflowActionQueryExecutor.getInstance()
            .executeUpdate(WorkflowActionQuery.UPDATE_ACTION_FOR_LAST_CHECKED_TIME, action);

    int actionCheckDelay = 20;

    Runnable actionCheckRunnable = new ActionCheckRunnable(actionCheckDelay);
    actionCheckRunnable.run();

    sleep(3000);

    List<WorkflowActionBean> actions2 = jpaService.execute(actionsGetExecutor);
    WorkflowActionBean action2 = null;
    for (WorkflowActionBean bean : actions2) {
        if (bean.getType().equals("test")) {
            action2 = bean;
            break;
        }
    }
    assertNotNull(action);
    assertEquals(WorkflowActionBean.Status.RUNNING, action2.getStatus());
    assertEquals(WorkflowJob.Status.RUNNING, engine.getJob(jobId).getStatus());
}

From source file:org.apache.oozie.service.TestLiteWorkflowAppService.java

License:Apache License

public void checkSubworkflowLibHelper(String inherit, String inheritWF, int unique, String[] parentLibs,
        String[] childLibs, String[] expectedLibs) throws Exception {
    Services services = new Services();
    try {/*from   w w w. j  a v a  2 s . c  o m*/
        services.getConf().set("oozie.subworkflow.classpath.inheritance", inherit);
        services.init();
        Reader reader = IOUtils.getResourceAsReader("wf-schema-valid.xml", -1);
        String childWFDir = createTestCaseSubDir("child-wf-" + unique);
        File childWFFile = new File(childWFDir, "workflow.xml");
        Writer writer = new FileWriter(childWFFile);
        IOUtils.copyCharStream(reader, writer);

        WorkflowAppService wps = Services.get().get(WorkflowAppService.class);
        Configuration jobConf = new XConfiguration();
        jobConf.set(OozieClient.APP_PATH, childWFFile.toURI().toString());
        jobConf.set(OozieClient.USER_NAME, getTestUser());
        if (inheritWF != null) {
            jobConf.set("oozie.wf.subworkflow.classpath.inheritance", inheritWF);
        }

        String childLibDir = createTestCaseSubDir("child-wf-" + unique, "lib");
        for (String childLib : childLibs) {
            writer = new FileWriter(new File(childLibDir, childLib));
            writer.write("bla bla");
            writer.close();
        }
        String parentLibDir = createTestCaseSubDir("parent-wf-" + unique, "lib");
        String[] parentLibsFullPaths = new String[parentLibs.length];
        for (int i = 0; i < parentLibs.length; i++) {
            parentLibsFullPaths[i] = new File(parentLibDir, parentLibs[i]).toString();
            writer = new FileWriter(parentLibsFullPaths[i]);
            writer.write("bla bla");
            writer.close();
        }
        // Set the parent libs
        jobConf.setStrings(WorkflowAppService.APP_LIB_PATH_LIST, parentLibsFullPaths);

        Configuration protoConf = wps.createProtoActionConf(jobConf, true);
        assertEquals(getTestUser(), protoConf.get(OozieClient.USER_NAME));

        String[] foundLibs = protoConf.getStrings(WorkflowAppService.APP_LIB_PATH_LIST);
        if (expectedLibs.length > 0) {
            assertEquals(expectedLibs.length, foundLibs.length);
            for (int i = 0; i < foundLibs.length; i++) {
                Path p = new Path(foundLibs[i]);
                foundLibs[i] = p.getName();
            }
            Arrays.sort(expectedLibs);
            Arrays.sort(foundLibs);
            assertEquals(Arrays.toString(expectedLibs), Arrays.toString(foundLibs));
        } else {
            assertEquals(null, foundLibs);
        }
    } finally {
        services.destroy();
    }
}

From source file:org.apache.oozie.service.TestPurgeService.java

License:Apache License

/**
 * Tests the {@link org.apache.oozie.service.PurgeService}.
 * </p>//from ww  w. j  av a  2 s  .com
 * Creates and runs a new workflow job to completion.
 * Attempts to purge jobs older than a day. Verifies the presence of the job in the system.
 * </p>
 * Sets the end date for the same job to make it qualify for the purge criteria.
 * Calls the purge service, and ensure the job does not exist in the system.
 */
public void testPurgeServiceForWorkflow() throws Exception {
    Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1);
    Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml"));
    IOUtils.copyCharStream(reader, writer);

    final DagEngine engine = new DagEngine("u");
    Configuration conf = new XConfiguration();
    conf.set(OozieClient.APP_PATH, getTestCaseFileUri("workflow.xml"));
    conf.setStrings(OozieClient.USER_NAME, getTestUser());
    conf.setStrings(OozieClient.GROUP_NAME, getTestGroup());

    conf.set(OozieClient.LOG_TOKEN, "t");

    conf.set("external-status", "ok");
    conf.set("signal-value", "based_on_action_status");
    final String jobId = engine.submitJob(conf, true);

    waitFor(5000, new Predicate() {
        public boolean evaluate() throws Exception {
            return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUCCEEDED);
        }
    });
    assertEquals(WorkflowJob.Status.SUCCEEDED, engine.getJob(jobId).getStatus());
    new PurgeXCommand(1, 1, 1, 10000).call();
    sleep(1000);

    JPAService jpaService = Services.get().get(JPAService.class);
    WorkflowJobGetJPAExecutor wfJobGetCmd = new WorkflowJobGetJPAExecutor(jobId);
    WorkflowJobBean wfBean = jpaService.execute(wfJobGetCmd);
    Date endDate = new Date(System.currentTimeMillis() - 2 * 24 * 60 * 60 * 1000);
    wfBean.setEndTime(endDate);
    wfBean.setLastModifiedTime(new Date());
    WorkflowJobQueryExecutor.getInstance()
            .executeUpdate(WorkflowJobQuery.UPDATE_WORKFLOW_STATUS_INSTANCE_MOD_END, wfBean);

    Runnable purgeRunnable = new PurgeRunnable(1, 1, 1, 100);
    purgeRunnable.run();

    waitFor(10000, new Predicate() {
        public boolean evaluate() throws Exception {
            try {
                engine.getJob(jobId).getStatus();
            } catch (Exception ex) {
                return true;
            }
            return false;
        }
    });

    try {
        engine.getJob(jobId).getStatus();
        fail("Job should be purged. Should fail.");
    } catch (Exception ex) {
        assertEquals(ex.getClass(), DagEngineException.class);
        DagEngineException dex = (DagEngineException) ex;
        assertEquals(ErrorCode.E0604, dex.getErrorCode());
    }

}

From source file:org.apache.phoenix.end2end.HttpParamImpersonationQueryServerIT.java

License:Apache License

/**
 * Setup and start kerberos, hbase/* w  ww  .j  a  va  2s  .  com*/
 */
@BeforeClass
public static void setUp() throws Exception {
    final Configuration conf = UTIL.getConfiguration();
    // Ensure the dirs we need are created/empty
    ensureIsEmptyDirectory(TEMP_DIR);
    ensureIsEmptyDirectory(KEYTAB_DIR);
    KEYTAB = new File(KEYTAB_DIR, "test.keytab");
    // Start a MiniKDC
    KDC = UTIL.setupMiniKdc(KEYTAB);
    // Create a service principal and spnego principal in one keytab
    // NB. Due to some apparent limitations between HDFS and HBase in the same JVM, trying to
    //     use separate identies for HBase and HDFS results in a GSS initiate error. The quick
    //     solution is to just use a single "service" principal instead of "hbase" and "hdfs"
    //     (or "dn" and "nn") per usual.
    KDC.createPrincipal(KEYTAB, SPNEGO_PRINCIPAL, SERVICE_PRINCIPAL);
    // Start ZK by hand
    UTIL.startMiniZKCluster();

    // Create a number of unprivileged users
    createUsers(2);

    // Set configuration for HBase
    HBaseKerberosUtils.setPrincipalForTesting(SERVICE_PRINCIPAL + "@" + KDC.getRealm());
    HBaseKerberosUtils.setSecuredConfiguration(conf);
    setHdfsSecuredConfiguration(conf);
    UserGroupInformation.setConfiguration(conf);
    conf.setInt(HConstants.MASTER_PORT, 0);
    conf.setInt(HConstants.MASTER_INFO_PORT, 0);
    conf.setInt(HConstants.REGIONSERVER_PORT, 0);
    conf.setInt(HConstants.REGIONSERVER_INFO_PORT, 0);
    conf.setStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, AccessController.class.getName());
    conf.setStrings(CoprocessorHost.REGIONSERVER_COPROCESSOR_CONF_KEY, AccessController.class.getName());
    conf.setStrings(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, AccessController.class.getName(),
            TokenProvider.class.getName());

    // Secure Phoenix setup
    conf.set("phoenix.queryserver.kerberos.principal", SPNEGO_PRINCIPAL);
    conf.set("phoenix.queryserver.keytab.file", KEYTAB.getAbsolutePath());
    conf.setBoolean(QueryServices.QUERY_SERVER_DISABLE_KERBEROS_LOGIN, true);
    conf.setInt(QueryServices.QUERY_SERVER_HTTP_PORT_ATTRIB, 0);
    // Required so that PQS can impersonate the end-users to HBase
    conf.set("hadoop.proxyuser.HTTP.groups", "*");
    conf.set("hadoop.proxyuser.HTTP.hosts", "*");
    // user1 is allowed to impersonate others, user2 is not
    conf.set("hadoop.proxyuser.user1.groups", "*");
    conf.set("hadoop.proxyuser.user1.hosts", "*");
    conf.setBoolean(QueryServices.QUERY_SERVER_WITH_REMOTEUSEREXTRACTOR_ATTRIB, true);

    // Clear the cached singletons so we can inject our own.
    InstanceResolver.clearSingletons();
    // Make sure the ConnectionInfo doesn't try to pull a default Configuration
    InstanceResolver.getSingleton(ConfigurationFactory.class, new ConfigurationFactory() {
        @Override
        public Configuration getConfiguration() {
            return conf;
        }

        @Override
        public Configuration getConfiguration(Configuration confToClone) {
            Configuration copy = new Configuration(conf);
            copy.addResource(confToClone);
            return copy;
        }
    });
    updateDefaultRealm();

    // Start HDFS
    UTIL.startMiniDFSCluster(1);
    // Use LocalHBaseCluster to avoid HBaseTestingUtility from doing something wrong
    // NB. I'm not actually sure what HTU does incorrect, but this was pulled from some test
    //     classes in HBase itself. I couldn't get HTU to work myself (2017/07/06)
    Path rootdir = UTIL.getDataTestDirOnTestFS(HttpParamImpersonationQueryServerIT.class.getSimpleName());
    FSUtils.setRootDir(conf, rootdir);
    HBASE_CLUSTER = new LocalHBaseCluster(conf, 1);
    HBASE_CLUSTER.startup();

    // Then fork a thread with PQS in it.
    startQueryServer();
}