List of usage examples for org.apache.hadoop.conf Configuration setStrings
public void setStrings(String name, String... values)
name
property as as comma delimited values. From source file:org.apache.mahout.avro.text.mapreduce.AvroDocumentProcessor.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length != 2) { System.err.println("Usage: wordcount <in> <out>"); return 0; }//from w w w . j a v a 2 s . c o m conf.setStrings("io.serializations", new String[] { WritableSerialization.class.getName(), AvroSpecificSerialization.class.getName(), AvroReflectSerialization.class.getName(), AvroGenericSerialization.class.getName() }); AvroComparator.setSchema(AvroDocument._SCHEMA); //TODO: must be done in mapper, reducer configure method. conf.setClass("mapred.output.key.comparator.class", AvroComparator.class, RawComparator.class); Job job = new Job(conf, "document processor"); job.setJarByClass(AvroDocumentProcessor.class); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(Reducer.class); job.setOutputKeyClass(AvroDocument.class); job.setOutputValueClass(NullWritable.class); job.setInputFormatClass(AvroInputFormat.class); job.setOutputFormatClass(AvroOutputFormat.class); Path input = new Path(args[0]); Path output = new Path(args[1]); FileSystem fs = FileSystem.get(conf); fs.delete(output, true); AvroInputFormat.setAvroInputClass(job, AvroDocument.class); AvroOutputFormat.setAvroOutputClass(job, AvroDocument.class); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); return job.waitForCompletion(true) ? 0 : 1; }
From source file:org.apache.mahout.avro.text.mapreduce.AvroDocumentsWordCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length != 2) { System.err.println("Usage: wordcount <in> <out>"); return 0; }/*from ww w . j a v a 2 s. c o m*/ conf.setStrings("io.serializations", new String[] { WritableSerialization.class.getName(), AvroSpecificSerialization.class.getName(), AvroReflectSerialization.class.getName(), AvroGenericSerialization.class.getName() }); Job job = new Job(conf, "word count"); job.setJarByClass(AvroDocumentsWordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(AvroInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); Path input = new Path(args[0]); Path output = new Path(args[1]); FileSystem fs = FileSystem.get(conf); fs.delete(output, true); AvroInputFormat.setAvroInputClass(job, AvroDocument.class); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); return job.waitForCompletion(true) ? 0 : 1; }
From source file:org.apache.mahout.classifier.svm.mapreduce.MapReduceUtil.java
License:Apache License
public static void checkParameters(Configuration oldConf, Class<? extends Mapper> innerMapper, Class<? extends Mapper> innerCombiner, Class<? extends Reducer> innerReducer) throws IOException, InterruptedException, NoSuchMethodException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, ClassNotFoundException, InstantiationException { Configuration conf = copyConfiguration(oldConf); // get a copy of it // important, for reducer use conf.setStrings("io.serializations", "org.apache.hadoop.io.serializer.WritableSerialization"); }
From source file:org.apache.mnemonic.hadoop.MneConfigHelper.java
License:Apache License
public static void setEntityFactoryProxies(Configuration conf, String prefix, Class<?>[] proxies) { List<String> vals = new ArrayList<>(); for (Class<?> itm : proxies) { vals.add(itm.getName());/*from www. j av a 2 s . co m*/ } conf.setStrings(getConfigName(prefix, ENTITY_FACTORY_PROXIES), vals.toArray(new String[0])); }
From source file:org.apache.oozie.action.hadoop.LauncherHelper.java
License:Apache License
public static void setupLauncherInfo(Configuration launcherConf, String jobId, String actionId, Path actionDir, String recoveryId, Configuration actionConf, String prepareXML) throws IOException, HadoopAccessorException { launcherConf.set(LauncherAMUtils.OOZIE_JOB_ID, jobId); launcherConf.set(LauncherAMUtils.OOZIE_ACTION_ID, actionId); launcherConf.set(LauncherAMUtils.OOZIE_ACTION_DIR_PATH, actionDir.toString()); launcherConf.set(LauncherAMUtils.OOZIE_ACTION_RECOVERY_ID, recoveryId); launcherConf.set(LauncherAMUtils.ACTION_PREPARE_XML, prepareXML); actionConf.set(LauncherAMUtils.OOZIE_JOB_ID, jobId); actionConf.set(LauncherAMUtils.OOZIE_ACTION_ID, actionId); if (Services.get().getConf().getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", false)) {// www . ja v a 2s . co m List<String> purgedEntries = new ArrayList<>(); Collection<String> entries = actionConf.getStringCollection("mapreduce.job.cache.files"); for (String entry : entries) { if (entry.contains("#")) { purgedEntries.add(entry); } } actionConf.setStrings("mapreduce.job.cache.files", purgedEntries.toArray(new String[purgedEntries.size()])); launcherConf.setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true); } }
From source file:org.apache.oozie.action.hadoop.LauncherMapperHelper.java
License:Apache License
public static void setupLauncherInfo(JobConf launcherConf, String jobId, String actionId, Path actionDir, String recoveryId, Configuration actionConf, String prepareXML) throws IOException, HadoopAccessorException { launcherConf.setMapperClass(LauncherMapper.class); launcherConf.setSpeculativeExecution(false); launcherConf.setNumMapTasks(1);//from w w w. j a v a2 s . c o m launcherConf.setNumReduceTasks(0); launcherConf.set(LauncherMapper.OOZIE_JOB_ID, jobId); launcherConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId); launcherConf.set(LauncherMapper.OOZIE_ACTION_DIR_PATH, actionDir.toString()); launcherConf.set(LauncherMapper.OOZIE_ACTION_RECOVERY_ID, recoveryId); launcherConf.set(LauncherMapper.ACTION_PREPARE_XML, prepareXML); actionConf.set(LauncherMapper.OOZIE_JOB_ID, jobId); actionConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId); if (Services.get().getConf().getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", false)) { List<String> purgedEntries = new ArrayList<String>(); Collection<String> entries = actionConf.getStringCollection("mapreduce.job.cache.files"); for (String entry : entries) { if (entry.contains("#")) { purgedEntries.add(entry); } } actionConf.setStrings("mapreduce.job.cache.files", purgedEntries.toArray(new String[purgedEntries.size()])); launcherConf.setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true); } FileSystem fs = Services.get().get(HadoopAccessorService.class) .createFileSystem(launcherConf.get("user.name"), actionDir.toUri(), launcherConf); fs.mkdirs(actionDir); OutputStream os = fs.create(new Path(actionDir, LauncherMapper.ACTION_CONF_XML)); try { actionConf.writeXml(os); } finally { IOUtils.closeSafely(os); } launcherConf.setInputFormat(OozieLauncherInputFormat.class); launcherConf.set("mapred.output.dir", new Path(actionDir, "output").toString()); }
From source file:org.apache.oozie.service.TestActionCheckerService.java
License:Apache License
/** * Tests the delayed check functionality of the Action Check Service * Runnable. </p> Starts an action which behaves like an Async Action * (Action and Job state set to Running). Verifies the action status to be * RUNNING. </p> Updates the last check time to now, and attempts to run the * ActionCheckRunnable with the delay configured to 20 seconds. * * @throws Exception// w w w . j a va2 s .c o m */ public void testActionCheckerServiceDelay() throws Exception { Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1); Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml")); IOUtils.copyCharStream(reader, writer); final DagEngine engine = new DagEngine("u"); Configuration conf = new XConfiguration(); conf.set(OozieClient.APP_PATH, getTestCaseFileUri("workflow.xml")); conf.setStrings(WorkflowAppService.HADOOP_USER, getTestUser()); conf.setStrings(OozieClient.GROUP_NAME, getTestGroup()); conf.set(OozieClient.LOG_TOKEN, "t"); conf.set("external-status", "ok"); conf.set("signal-value", "based_on_action_status"); conf.set("running-mode", "async"); final String jobId = engine.submitJob(conf, true); sleep(200); waitFor(5000, new Predicate() { public boolean evaluate() throws Exception { return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.RUNNING); } }); sleep(100); JPAService jpaService = Services.get().get(JPAService.class); assertNotNull(jpaService); WorkflowActionsGetForJobJPAExecutor actionsGetExecutor = new WorkflowActionsGetForJobJPAExecutor(jobId); List<WorkflowActionBean> actions = jpaService.execute(actionsGetExecutor); WorkflowActionBean action = null; for (WorkflowActionBean bean : actions) { if (bean.getType().equals("test")) { action = bean; break; } } assertNotNull(action); assertEquals(WorkflowActionBean.Status.RUNNING, action.getStatus()); action.setLastCheckTime(new Date()); WorkflowActionQueryExecutor.getInstance() .executeUpdate(WorkflowActionQuery.UPDATE_ACTION_FOR_LAST_CHECKED_TIME, action); int actionCheckDelay = 20; Runnable actionCheckRunnable = new ActionCheckRunnable(actionCheckDelay); actionCheckRunnable.run(); sleep(3000); List<WorkflowActionBean> actions2 = jpaService.execute(actionsGetExecutor); WorkflowActionBean action2 = null; for (WorkflowActionBean bean : actions2) { if (bean.getType().equals("test")) { action2 = bean; break; } } assertNotNull(action); assertEquals(WorkflowActionBean.Status.RUNNING, action2.getStatus()); assertEquals(WorkflowJob.Status.RUNNING, engine.getJob(jobId).getStatus()); }
From source file:org.apache.oozie.service.TestLiteWorkflowAppService.java
License:Apache License
public void checkSubworkflowLibHelper(String inherit, String inheritWF, int unique, String[] parentLibs, String[] childLibs, String[] expectedLibs) throws Exception { Services services = new Services(); try {/*from w w w. j a v a 2 s . c o m*/ services.getConf().set("oozie.subworkflow.classpath.inheritance", inherit); services.init(); Reader reader = IOUtils.getResourceAsReader("wf-schema-valid.xml", -1); String childWFDir = createTestCaseSubDir("child-wf-" + unique); File childWFFile = new File(childWFDir, "workflow.xml"); Writer writer = new FileWriter(childWFFile); IOUtils.copyCharStream(reader, writer); WorkflowAppService wps = Services.get().get(WorkflowAppService.class); Configuration jobConf = new XConfiguration(); jobConf.set(OozieClient.APP_PATH, childWFFile.toURI().toString()); jobConf.set(OozieClient.USER_NAME, getTestUser()); if (inheritWF != null) { jobConf.set("oozie.wf.subworkflow.classpath.inheritance", inheritWF); } String childLibDir = createTestCaseSubDir("child-wf-" + unique, "lib"); for (String childLib : childLibs) { writer = new FileWriter(new File(childLibDir, childLib)); writer.write("bla bla"); writer.close(); } String parentLibDir = createTestCaseSubDir("parent-wf-" + unique, "lib"); String[] parentLibsFullPaths = new String[parentLibs.length]; for (int i = 0; i < parentLibs.length; i++) { parentLibsFullPaths[i] = new File(parentLibDir, parentLibs[i]).toString(); writer = new FileWriter(parentLibsFullPaths[i]); writer.write("bla bla"); writer.close(); } // Set the parent libs jobConf.setStrings(WorkflowAppService.APP_LIB_PATH_LIST, parentLibsFullPaths); Configuration protoConf = wps.createProtoActionConf(jobConf, true); assertEquals(getTestUser(), protoConf.get(OozieClient.USER_NAME)); String[] foundLibs = protoConf.getStrings(WorkflowAppService.APP_LIB_PATH_LIST); if (expectedLibs.length > 0) { assertEquals(expectedLibs.length, foundLibs.length); for (int i = 0; i < foundLibs.length; i++) { Path p = new Path(foundLibs[i]); foundLibs[i] = p.getName(); } Arrays.sort(expectedLibs); Arrays.sort(foundLibs); assertEquals(Arrays.toString(expectedLibs), Arrays.toString(foundLibs)); } else { assertEquals(null, foundLibs); } } finally { services.destroy(); } }
From source file:org.apache.oozie.service.TestPurgeService.java
License:Apache License
/** * Tests the {@link org.apache.oozie.service.PurgeService}. * </p>//from ww w. j av a 2 s .com * Creates and runs a new workflow job to completion. * Attempts to purge jobs older than a day. Verifies the presence of the job in the system. * </p> * Sets the end date for the same job to make it qualify for the purge criteria. * Calls the purge service, and ensure the job does not exist in the system. */ public void testPurgeServiceForWorkflow() throws Exception { Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1); Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml")); IOUtils.copyCharStream(reader, writer); final DagEngine engine = new DagEngine("u"); Configuration conf = new XConfiguration(); conf.set(OozieClient.APP_PATH, getTestCaseFileUri("workflow.xml")); conf.setStrings(OozieClient.USER_NAME, getTestUser()); conf.setStrings(OozieClient.GROUP_NAME, getTestGroup()); conf.set(OozieClient.LOG_TOKEN, "t"); conf.set("external-status", "ok"); conf.set("signal-value", "based_on_action_status"); final String jobId = engine.submitJob(conf, true); waitFor(5000, new Predicate() { public boolean evaluate() throws Exception { return (engine.getJob(jobId).getStatus() == WorkflowJob.Status.SUCCEEDED); } }); assertEquals(WorkflowJob.Status.SUCCEEDED, engine.getJob(jobId).getStatus()); new PurgeXCommand(1, 1, 1, 10000).call(); sleep(1000); JPAService jpaService = Services.get().get(JPAService.class); WorkflowJobGetJPAExecutor wfJobGetCmd = new WorkflowJobGetJPAExecutor(jobId); WorkflowJobBean wfBean = jpaService.execute(wfJobGetCmd); Date endDate = new Date(System.currentTimeMillis() - 2 * 24 * 60 * 60 * 1000); wfBean.setEndTime(endDate); wfBean.setLastModifiedTime(new Date()); WorkflowJobQueryExecutor.getInstance() .executeUpdate(WorkflowJobQuery.UPDATE_WORKFLOW_STATUS_INSTANCE_MOD_END, wfBean); Runnable purgeRunnable = new PurgeRunnable(1, 1, 1, 100); purgeRunnable.run(); waitFor(10000, new Predicate() { public boolean evaluate() throws Exception { try { engine.getJob(jobId).getStatus(); } catch (Exception ex) { return true; } return false; } }); try { engine.getJob(jobId).getStatus(); fail("Job should be purged. Should fail."); } catch (Exception ex) { assertEquals(ex.getClass(), DagEngineException.class); DagEngineException dex = (DagEngineException) ex; assertEquals(ErrorCode.E0604, dex.getErrorCode()); } }
From source file:org.apache.phoenix.end2end.HttpParamImpersonationQueryServerIT.java
License:Apache License
/** * Setup and start kerberos, hbase/* w ww .j a va 2s . com*/ */ @BeforeClass public static void setUp() throws Exception { final Configuration conf = UTIL.getConfiguration(); // Ensure the dirs we need are created/empty ensureIsEmptyDirectory(TEMP_DIR); ensureIsEmptyDirectory(KEYTAB_DIR); KEYTAB = new File(KEYTAB_DIR, "test.keytab"); // Start a MiniKDC KDC = UTIL.setupMiniKdc(KEYTAB); // Create a service principal and spnego principal in one keytab // NB. Due to some apparent limitations between HDFS and HBase in the same JVM, trying to // use separate identies for HBase and HDFS results in a GSS initiate error. The quick // solution is to just use a single "service" principal instead of "hbase" and "hdfs" // (or "dn" and "nn") per usual. KDC.createPrincipal(KEYTAB, SPNEGO_PRINCIPAL, SERVICE_PRINCIPAL); // Start ZK by hand UTIL.startMiniZKCluster(); // Create a number of unprivileged users createUsers(2); // Set configuration for HBase HBaseKerberosUtils.setPrincipalForTesting(SERVICE_PRINCIPAL + "@" + KDC.getRealm()); HBaseKerberosUtils.setSecuredConfiguration(conf); setHdfsSecuredConfiguration(conf); UserGroupInformation.setConfiguration(conf); conf.setInt(HConstants.MASTER_PORT, 0); conf.setInt(HConstants.MASTER_INFO_PORT, 0); conf.setInt(HConstants.REGIONSERVER_PORT, 0); conf.setInt(HConstants.REGIONSERVER_INFO_PORT, 0); conf.setStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, AccessController.class.getName()); conf.setStrings(CoprocessorHost.REGIONSERVER_COPROCESSOR_CONF_KEY, AccessController.class.getName()); conf.setStrings(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, AccessController.class.getName(), TokenProvider.class.getName()); // Secure Phoenix setup conf.set("phoenix.queryserver.kerberos.principal", SPNEGO_PRINCIPAL); conf.set("phoenix.queryserver.keytab.file", KEYTAB.getAbsolutePath()); conf.setBoolean(QueryServices.QUERY_SERVER_DISABLE_KERBEROS_LOGIN, true); conf.setInt(QueryServices.QUERY_SERVER_HTTP_PORT_ATTRIB, 0); // Required so that PQS can impersonate the end-users to HBase conf.set("hadoop.proxyuser.HTTP.groups", "*"); conf.set("hadoop.proxyuser.HTTP.hosts", "*"); // user1 is allowed to impersonate others, user2 is not conf.set("hadoop.proxyuser.user1.groups", "*"); conf.set("hadoop.proxyuser.user1.hosts", "*"); conf.setBoolean(QueryServices.QUERY_SERVER_WITH_REMOTEUSEREXTRACTOR_ATTRIB, true); // Clear the cached singletons so we can inject our own. InstanceResolver.clearSingletons(); // Make sure the ConnectionInfo doesn't try to pull a default Configuration InstanceResolver.getSingleton(ConfigurationFactory.class, new ConfigurationFactory() { @Override public Configuration getConfiguration() { return conf; } @Override public Configuration getConfiguration(Configuration confToClone) { Configuration copy = new Configuration(conf); copy.addResource(confToClone); return copy; } }); updateDefaultRealm(); // Start HDFS UTIL.startMiniDFSCluster(1); // Use LocalHBaseCluster to avoid HBaseTestingUtility from doing something wrong // NB. I'm not actually sure what HTU does incorrect, but this was pulled from some test // classes in HBase itself. I couldn't get HTU to work myself (2017/07/06) Path rootdir = UTIL.getDataTestDirOnTestFS(HttpParamImpersonationQueryServerIT.class.getSimpleName()); FSUtils.setRootDir(conf, rootdir); HBASE_CLUSTER = new LocalHBaseCluster(conf, 1); HBASE_CLUSTER.startup(); // Then fork a thread with PQS in it. startQueryServer(); }