List of usage examples for org.apache.hadoop.conf Configuration setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:cloudbase.core.client.mapreduce.CloudbaseOutputFormatShim.java
License:Apache License
public static void setMockInstance(JobContext job, String instanceName) { Configuration conf = job.getConfiguration(); conf.setBoolean(INSTANCE_HAS_BEEN_SET, true); conf.setBoolean(MOCK, true);//from w ww . j a va 2 s .com conf.set(INSTANCE_NAME, instanceName); }
From source file:clustering.inverted_index.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s tf_idf_result_dir output_dir" + "[decimal_number] [pruning_threshold]\n", getClass().getSimpleName()); System.exit(1);/* w w w .j a va2 s . c o m*/ } Path normDir = new Path(args[1] + "/normed"); Path resultDir = new Path(args[1] + "/result"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 2) { conf.setInt("deci.number", Integer.valueOf(args[2])); } else { conf.setInt("deci.number", 4); } if (args.length > 3) { conf.setBoolean("pruning", true); conf.setDouble("pruning.threshold", Double.valueOf(args[3])); } else { conf.setBoolean("pruning", false); } JobControl jobControl = new JobControl("inverted-index jobs"); /* step 1, normalize the vector lenth of each document */ Job job1 = Job.getInstance(conf, "tf idf normalizer job"); job1.setJarByClass(Driver.class); FileInputFormat.addInputPath(job1, new Path(args[0])); job1.setInputFormatClass(KeyValueTextInputFormat.class); job1.setMapperClass(Mapper.class); job1.setReducerClass(NormalizerReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job1, normDir); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); /* step 2, calculate inverted index */ Job job2 = Job.getInstance(conf, "inverted index job"); job2.setJarByClass(Driver.class); FileInputFormat.addInputPath(job2, normDir); job2.setInputFormatClass(KeyValueTextInputFormat.class); job2.setMapperClass(Mapper.class); job2.setReducerClass(InvertedIndexReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job2, resultDir); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); MapReduceUtils.runJobs(jobControl); return job2.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.similarity.ISimDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s simpre_dir output_dir " + "[compression_or_not] [reduce_task_number]\n", getClass().getSimpleName()); System.exit(1);/*from w ww.j av a 2 s. c o m*/ } Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); Job job = Job.getInstance(conf, "isim job"); job.setJarByClass(ISimDriver.class); if (args.length > 2 && args[2].equals("0")) { FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(KeyValueTextInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); } else { job.setInputFormatClass(SequenceFileAsTextInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(args[0])); conf.setBoolean("mapreduce.map.output.compress", true); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.GzipCodec"); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); SequenceFileOutputFormat.setOutputCompressorClass(job, org.apache.hadoop.io.compress.GzipCodec.class); SequenceFileOutputFormat.setOutputPath(job, new Path(args[1])); } if (args.length > 3) { conf.setInt("reduce.num", Integer.valueOf(args[3])); } else { conf.setInt("reduce.num", 5); } job.setMapperClass(ISimMapper.class); job.setMapOutputKeyClass(IntIntTupleWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setCombinerClass(ISimCombiner.class); job.setPartitionerClass(HashPartitioner.class); job.setNumReduceTasks(conf.getInt("reduce.num", 1)); job.setReducerClass(ISimReducer.class); job.setOutputKeyClass(IntIntTupleWritable.class); job.setOutputValueClass(DoubleWritable.class); long starttime = System.currentTimeMillis(); boolean complete = job.waitForCompletion(true); long endtime = System.currentTimeMillis(); System.out.println("inverted similarity job finished in: " + (endtime - starttime) / 1000 + " seconds"); return complete ? 0 : 1; }
From source file:cn.jpush.hdfs.mr.example.BaileyBorweinPlouffe.java
License:Apache License
/** Create and setup a job */ @SuppressWarnings("deprecation") private static Job createJob(String name, Configuration conf) throws IOException { final Job job = new Job(conf, NAME + "_" + name); final Configuration jobconf = job.getConfiguration(); job.setJarByClass(BaileyBorweinPlouffe.class); // setup mapper job.setMapperClass(BbpMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); // setup reducer job.setReducerClass(BbpReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BytesWritable.class); job.setNumReduceTasks(1);//www .ja v a 2s . c o m // setup input job.setInputFormatClass(BbpInputFormat.class); // disable task timeout jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0); // do not use speculative execution jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false); jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false); return job; }
From source file:co.cask.cdap.data2.transaction.distributed.TransactionServiceClientTest.java
License:Apache License
@BeforeClass public static void beforeClass() throws Exception { HBaseTestingUtility hBaseTestingUtility = new HBaseTestingUtility(); hBaseTestingUtility.startMiniDFSCluster(1); Configuration hConf = hBaseTestingUtility.getConfiguration(); hConf.setBoolean("fs.hdfs.impl.disable.cache", true); zkServer = InMemoryZKServer.builder().build(); zkServer.startAndWait();/*from www . j av a 2 s . c o m*/ cConf = CConfiguration.create(); // tests should use the current user for HDFS cConf.set(Constants.CFG_HDFS_USER, System.getProperty("user.name")); cConf.set(Constants.Zookeeper.QUORUM, zkServer.getConnectionStr()); cConf.set(Constants.CFG_LOCAL_DATA_DIR, tmpFolder.newFolder().getAbsolutePath()); // we want persisting for this test cConf.setBoolean(TxConstants.Manager.CFG_DO_PERSIST, true); server = TransactionServiceTest.createTxService(zkServer.getConnectionStr(), Networks.getRandomPort(), hConf, tmpFolder.newFolder()); server.startAndWait(); injector = Guice.createInjector(new ConfigModule(cConf, hConf), new ZKClientModule(), new LocationRuntimeModule().getInMemoryModules(), new DiscoveryRuntimeModule().getDistributedModules(), new TransactionMetricsModule(), new DataFabricModules().getDistributedModules(), Modules.override(new DataSetsModules().getDistributedModules()).with(new AbstractModule() { @Override protected void configure() { bind(MetadataStore.class).to(NoOpMetadataStore.class); } })); zkClient = injector.getInstance(ZKClientService.class); zkClient.startAndWait(); txStateStorage = injector.getInstance(TransactionStateStorage.class); txStateStorage.startAndWait(); }
From source file:co.cask.cdap.data2.transaction.distributed.TransactionServiceTest.java
License:Apache License
@Test(timeout = 60000) public void testHA() throws Exception { HBaseTestingUtility hBaseTestingUtility = new HBaseTestingUtility(); hBaseTestingUtility.startMiniDFSCluster(1); Configuration hConf = hBaseTestingUtility.getConfiguration(); hConf.setBoolean("fs.hdfs.impl.disable.cache", true); InMemoryZKServer zkServer = InMemoryZKServer.builder().build(); zkServer.startAndWait();/*from w ww. j a va2 s . c o m*/ // NOTE: we play with blocking/nonblocking a lot below // as until we integrate with "leader election" stuff, service blocks on start if it is not a leader // TODO: fix this by integration with generic leader election stuff try { CConfiguration cConf = CConfiguration.create(); // tests should use the current user for HDFS cConf.set(Constants.CFG_HDFS_USER, System.getProperty("user.name")); cConf.set(Constants.Zookeeper.QUORUM, zkServer.getConnectionStr()); cConf.set(Constants.CFG_LOCAL_DATA_DIR, tmpFolder.newFolder().getAbsolutePath()); Injector injector = Guice.createInjector(new ConfigModule(cConf), new ZKClientModule(), new LocationRuntimeModule().getInMemoryModules(), new DiscoveryRuntimeModule().getDistributedModules(), new TransactionMetricsModule(), new DataFabricModules().getDistributedModules(), Modules.override(new DataSetsModules().getDistributedModules()).with(new AbstractModule() { @Override protected void configure() { bind(MetadataStore.class).to(NoOpMetadataStore.class); } })); ZKClientService zkClient = injector.getInstance(ZKClientService.class); zkClient.startAndWait(); final Table table = createTable("myTable"); try { // tx service client // NOTE: we can init it earlier than we start services, it should pick them up when they are available TransactionSystemClient txClient = injector.getInstance(TransactionSystemClient.class); TransactionExecutor txExecutor = new DefaultTransactionExecutor(txClient, ImmutableList.of((TransactionAware) table)); // starting tx service, tx client can pick it up TransactionService first = createTxService(zkServer.getConnectionStr(), Networks.getRandomPort(), hConf, tmpFolder.newFolder()); first.startAndWait(); Assert.assertNotNull(txClient.startShort()); verifyGetAndPut(table, txExecutor, null, "val1"); // starting another tx service should not hurt TransactionService second = createTxService(zkServer.getConnectionStr(), Networks.getRandomPort(), hConf, tmpFolder.newFolder()); // NOTE: we don't have to wait for start as client should pick it up anyways, but we do wait to ensure // the case with two active is handled well second.startAndWait(); // wait for affect a bit TimeUnit.SECONDS.sleep(1); Assert.assertNotNull(txClient.startShort()); verifyGetAndPut(table, txExecutor, "val1", "val2"); // shutting down the first one is fine: we have another one to pick up the leader role first.stopAndWait(); Assert.assertNotNull(txClient.startShort()); verifyGetAndPut(table, txExecutor, "val2", "val3"); // doing same trick again to failover to the third one TransactionService third = createTxService(zkServer.getConnectionStr(), Networks.getRandomPort(), hConf, tmpFolder.newFolder()); // NOTE: we don't have to wait for start as client should pick it up anyways third.start(); // stopping second one second.stopAndWait(); Assert.assertNotNull(txClient.startShort()); verifyGetAndPut(table, txExecutor, "val3", "val4"); // releasing resources third.stop(); } finally { dropTable("myTable", cConf); zkClient.stopAndWait(); } } finally { zkServer.stop(); } }
From source file:co.cask.cdap.explore.service.ExploreServiceUtils.java
License:Apache License
/** * Change hive-site.xml file, and return a temp copy of it to which are added * necessary options./*from www. j av a 2 s . c om*/ */ private static File updateHiveConfFile(File confFile, File tempDir) { Configuration conf = new Configuration(false); try { conf.addResource(confFile.toURI().toURL()); } catch (MalformedURLException e) { LOG.error("File {} is malformed.", confFile, e); throw Throwables.propagate(e); } // we prefer jars at container's root directory before job.jar, // we edit the YARN_APPLICATION_CLASSPATH in yarn-site.xml using // co.cask.cdap.explore.service.ExploreServiceUtils.updateYarnConfFile and // setting the MAPREDUCE_JOB_CLASSLOADER and MAPREDUCE_JOB_USER_CLASSPATH_FIRST to false will put // YARN_APPLICATION_CLASSPATH before job.jar for container's classpath. conf.setBoolean(Job.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, false); conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, false); String sparkHome = System.getenv(Constants.SPARK_HOME); if (sparkHome != null) { LOG.debug("Setting spark.home in hive conf to {}", sparkHome); conf.set("spark.home", sparkHome); } File newHiveConfFile = new File(tempDir, "hive-site.xml"); try (FileOutputStream os = new FileOutputStream(newHiveConfFile)) { conf.writeXml(os); } catch (IOException e) { LOG.error("Problem creating temporary hive-site.xml conf file at {}", newHiveConfFile, e); throw Throwables.propagate(e); } return newHiveConfFile; }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
/** * Creates a MapReduce {@link Job} instance. * * @param hadoopTmpDir directory for the "hadoop.tmp.dir" configuration *//*from ww w . ja v a2 s.c om*/ private Job createJob(File hadoopTmpDir) throws IOException { Job job = Job.getInstance(new Configuration(hConf)); Configuration jobConf = job.getConfiguration(); if (MapReduceTaskContextProvider.isLocal(jobConf)) { // Set the MR framework local directories inside the given tmp directory. // Setting "hadoop.tmp.dir" here has no effect due to Explore Service need to set "hadoop.tmp.dir" // as system property for Hive to work in local mode. The variable substitution of hadoop conf // gives system property the highest precedence. jobConf.set("mapreduce.cluster.local.dir", new File(hadoopTmpDir, "local").getAbsolutePath()); jobConf.set("mapreduce.jobtracker.system.dir", new File(hadoopTmpDir, "system").getAbsolutePath()); jobConf.set("mapreduce.jobtracker.staging.root.dir", new File(hadoopTmpDir, "staging").getAbsolutePath()); jobConf.set("mapreduce.cluster.temp.dir", new File(hadoopTmpDir, "temp").getAbsolutePath()); } if (UserGroupInformation.isSecurityEnabled()) { // If runs in secure cluster, this program runner is running in a yarn container, hence not able // to get authenticated with the history. jobConf.unset("mapreduce.jobhistory.address"); jobConf.setBoolean(Job.JOB_AM_ACCESS_DISABLED, false); Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); LOG.info("Running in secure mode; adding all user credentials: {}", credentials.getAllTokens()); job.getCredentials().addAll(credentials); } return job; }
From source file:co.cask.cdap.operations.yarn.YarnRMHAOperationalStatsTest.java
License:Apache License
@Override protected MiniYARNCluster createYarnCluster() throws IOException, InterruptedException, YarnException { Configuration hConf = new Configuration(); hConf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true); String hostname = MiniYARNCluster.getHostname(); for (String confKey : YarnConfiguration.RM_SERVICES_ADDRESS_CONF_KEYS) { hConf.set(HAUtil.addSuffix(confKey, "rm0"), hostname + ":" + Networks.getRandomPort()); hConf.set(HAUtil.addSuffix(confKey, "rm1"), hostname + ":" + Networks.getRandomPort()); }/* ww w . ja va 2 s . co m*/ MiniYARNCluster yarnCluster = new MiniYARNCluster(getClass().getName(), 2, 2, 2, 2); yarnCluster.init(hConf); yarnCluster.start(); yarnCluster.getResourceManager(0).getRMContext().getRMAdminService().transitionToActive( new HAServiceProtocol.StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER)); return yarnCluster; }
From source file:co.cask.cdap.security.impersonation.UGIProviderTest.java
License:Apache License
@BeforeClass public static void init() throws Exception { cConf = CConfiguration.create();//w w w . j a va 2 s . c om cConf.set(Constants.CFG_LOCAL_DATA_DIR, TEMP_FOLDER.newFolder().getAbsolutePath()); // Start KDC miniKdc = new MiniKdc(MiniKdc.createConf(), TEMP_FOLDER.newFolder()); miniKdc.start(); System.setProperty("java.security.krb5.conf", miniKdc.getKrb5conf().getAbsolutePath()); // Generate keytab keytabFile = TEMP_FOLDER.newFile(); miniKdc.createPrincipal(keytabFile, "hdfs", "alice", "bob"); // Start mini DFS cluster Configuration hConf = new Configuration(); hConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEMP_FOLDER.newFolder().getAbsolutePath()); hConf.setBoolean("ipc.client.fallback-to-simple-auth-allowed", true); miniDFSCluster = new MiniDFSCluster.Builder(hConf).numDataNodes(1).build(); miniDFSCluster.waitClusterUp(); locationFactory = new FileContextLocationFactory(miniDFSCluster.getFileSystem().getConf()); hConf = new Configuration(); hConf.set("hadoop.security.authentication", "kerberos"); UserGroupInformation.setConfiguration(hConf); }