List of usage examples for org.apache.hadoop.conf Configuration setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:com.wandisco.s3hdfs.rewrite.filter.TestBase.java
License:Apache License
/** * @throws java.lang.Exception/*from w w w .java 2s. c o m*/ */ @Before public void setUp() throws Exception { Configuration conf = new HdfsConfiguration(new S3HdfsConfiguration()); conf.setInt(S3_PROXY_PORT_KEY, PROXY_PORT); conf.setBoolean(DFS_WEBHDFS_ENABLED_KEY, true); conf.setInt(DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100); conf.setLong(DFS_BLOCK_SIZE_KEY, 1024); conf.setLong(DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 512); // ^ has to be a multiple of 512 FsPermission.setUMask(conf, FsPermission.createImmutable((short) 0)); // ^ eliminate the UMask in HDFS to remove perm denied exceptions in s3Dir hostName = conf.get(S3_SERVICE_HOSTNAME_KEY); System.out.println("S3HDFS ServiceHostName: " + hostName); s3Directory = conf.get(S3_DIRECTORY_KEY); cluster = new MiniDFSCluster.Builder(conf).nameNodeHttpPort(HTTP_PORT).numDataNodes(3).build(); cluster.waitActive(); hdfs = cluster.getFileSystem(); //initialize s3 directory Path s3Path = new Path(s3Directory); assertTrue(hdfs.mkdirs(s3Path)); testUtil = new S3HdfsTestUtil(hdfs, s3Directory); s3Service = testUtil.configureS3Service(hostName, PROXY_PORT); }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormat.java
License:Apache License
public static void setInterpolateAcrossRangeKeyValues(Configuration conf, boolean interpolate) { conf.setBoolean(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_PROPERTY, interpolate); }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormatTest.java
License:Apache License
@Test public void testSetInterpolateAcrossRangeKeyValues() { Configuration conf = createMock(Configuration.class); conf.setBoolean(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_PROPERTY, true); replay(conf);//from w ww .ja v a 2 s.co m DynamoDBQueryInputFormat.setInterpolateAcrossRangeKeyValues(conf, true); verify(conf); }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormatTest.java
License:Apache License
@Test public void testSetRangeKeyValues() { Configuration conf = createMock(Configuration.class); final String[] VALUES = new String[] { "TEST1", "TEST2" }; Types type = Types.STRING; List<AttributeValue> attrs = new ArrayList<AttributeValue>(); for (String value : VALUES) { attrs.add(new AttributeValue().withS(value)); }/*from ww w . j a va 2 s .c o m*/ conf.setBoolean(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_PROPERTY, false); conf.setInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, type.ordinal()); conf.setStrings(DynamoDBConfiguration.RANGE_KEY_VALUES_PROPERTY, VALUES); replay(conf); DynamoDBQueryInputFormat.setRangeKeyValues(conf, type, attrs); verify(conf); }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormatTest.java
License:Apache License
@Test public void testSetRangeKeyCondition() { Configuration conf = createMock(Configuration.class); final String[] VALUES = new String[] { "TEST1", "TEST2" }; Types type = Types.STRING; ComparisonOperator operator = ComparisonOperator.BETWEEN; List<AttributeValue> attrs = new ArrayList<AttributeValue>(); for (String value : VALUES) { attrs.add(new AttributeValue().withS(value)); }//ww w . ja v a 2 s . co m conf.setBoolean(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_PROPERTY, false); conf.setInt(DynamoDBConfiguration.RANGE_KEY_OPERATOR_PROPERTY, ComparisonOperator.BETWEEN.ordinal()); conf.setInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, type.ordinal()); conf.setStrings(DynamoDBConfiguration.RANGE_KEY_VALUES_PROPERTY, VALUES); replay(conf); DynamoDBQueryInputFormat.setRangeKeyCondition(conf, type, operator, attrs); verify(conf); }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormatTest.java
License:Apache License
@Test public void testSetRangeKeyInterpolateMinValue() { Configuration conf = createMock(Configuration.class); final String VALUE = "TEST"; Types type = Types.STRING; AttributeValue attr = new AttributeValue().withS(VALUE); conf.setBoolean(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_PROPERTY, true); conf.setInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, type.ordinal()); conf.set(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_MIN_VALUE_PROPERTY, VALUE); replay(conf);/*from w w w.ja v a 2 s . co m*/ DynamoDBQueryInputFormat.setRangeKeyInterpolateMinValue(conf, type, attr); verify(conf); }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormatTest.java
License:Apache License
@Test public void testSetRangeKeyInterpolateMaxValue() { Configuration conf = createMock(Configuration.class); final String VALUE = "TEST"; Types type = Types.STRING; AttributeValue attr = new AttributeValue().withS(VALUE); conf.setBoolean(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_PROPERTY, true); conf.setInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, type.ordinal()); conf.set(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_MAX_VALUE_PROPERTY, VALUE); replay(conf);/* w w w .j a v a2s.c om*/ DynamoDBQueryInputFormat.setRangeKeyInterpolateMaxValue(conf, type, attr); verify(conf); }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormatTest.java
License:Apache License
@Test public void testSetRangeKeyInterpolateRange() { Configuration conf = createMock(Configuration.class); final String MIN_VALUE = "TEST1"; final String MAX_VALUE = "TEST2"; Types type = Types.STRING; AttributeValue min_attr = new AttributeValue().withS(MIN_VALUE); AttributeValue max_attr = new AttributeValue().withS(MAX_VALUE); conf.setBoolean(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_PROPERTY, true); conf.setInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, type.ordinal()); conf.set(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_MIN_VALUE_PROPERTY, MIN_VALUE); conf.setBoolean(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_PROPERTY, true); conf.setInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, type.ordinal()); conf.set(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_MAX_VALUE_PROPERTY, MAX_VALUE); replay(conf);/*from w w w. j av a2s. c om*/ DynamoDBQueryInputFormat.setRangeKeyInterpolateRange(conf, type, min_attr, max_attr); verify(conf); }
From source file:com.xiaoxiaomo.mr.utils.kafka.HadoopJob.java
License:Apache License
public int run(String[] args) throws Exception { CommandLineParser parser = new PosixParser(); Options options = buildOptions();//from w w w . j a va 2 s. co m CommandLine cmd = parser.parse(options, args); if (cmd.hasOption("h") || cmd.getArgs().length == 0) { printHelpAndExit(options); } String hdfsPath = cmd.getArgs()[0]; Configuration conf = getConf(); conf.setBoolean("mapred.map.tasks.speculative.execution", false); if (cmd.hasOption("topics")) { LOG.info("Using topics: " + cmd.getOptionValue("topics")); KafkaInputFormat.configureKafkaTopics(conf, cmd.getOptionValue("topics")); } else { printHelpAndExit(options); } KafkaInputFormat.configureZkConnection(conf, cmd.getOptionValue("zk-connect", "localhost:2181")); if (cmd.hasOption("consumer-group")) { CheckpointManager.configureUseZooKeeper(conf, cmd.getOptionValue("consumer-group", "dev-hadoop-loader")); } if (cmd.getOptionValue("autooffset-reset") != null) { KafkaInputFormat.configureAutoOffsetReset(conf, cmd.getOptionValue("autooffset-reset")); } JobConf jobConf = new JobConf(conf); if (cmd.hasOption("remote")) { String ip = cmd.getOptionValue("remote"); LOG.info("Default file system: hdfs://" + ip + ":8020/"); jobConf.set("fs.defaultFS", "hdfs://" + ip + ":8020/"); LOG.info("Remote jobtracker: " + ip + ":8021"); jobConf.set("mapred.job.tracker", ip + ":8021"); } Path jarTarget = new Path( getClass().getProtectionDomain().getCodeSource().getLocation() + "../kafka-hadoop-loader.jar"); if (new File(jarTarget.toUri()).exists()) { // running from IDE/ as maven jobConf.setJar(jarTarget.toUri().getPath()); LOG.info("Using target jar: " + jarTarget.toString()); } else { // running from jar remotely or locally jobConf.setJarByClass(getClass()); LOG.info("Using parent jar: " + jobConf.getJar()); } Job job = Job.getInstance(jobConf, "kafka.hadoop.loader"); job.setInputFormatClass(KafkaInputFormat.class); job.setMapperClass(HadoopJobMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(MultiOutputFormat.class); job.setNumReduceTasks(0); MultiOutputFormat.setOutputPath(job, new Path(hdfsPath)); MultiOutputFormat.setCompressOutput(job, cmd.getOptionValue("compress-output", "on").equals("on")); LOG.info("Output hdfs location: {}", hdfsPath); LOG.info("Output hdfs compression: {}", MultiOutputFormat.getCompressOutput(job)); return job.waitForCompletion(true) ? 0 : -1; }
From source file:com.yahoo.glimmer.indexing.preprocessor.PrepTool.java
License:Open Source License
@Override public int run(String[] args) throws Exception { SimpleJSAP jsap = new SimpleJSAP(PrepTool.class.getName(), "RDF tuples pre-processor for Glimmer", new Parameter[] { new Switch(NO_CONTEXTS_ARG, 'C', NO_CONTEXTS_ARG, "Don't process the contexts for each tuple."), new FlaggedOption(ONTOLOGY_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'O', ONTOLOGY_ARG), new FlaggedOption(REDUCER_COUNT_ARG, JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'r', REDUCER_COUNT_ARG), new UnflaggedOption(INPUT_ARG, JSAP.STRING_PARSER, JSAP.REQUIRED, "HDFS location for the input data."), new UnflaggedOption(OUTPUT_ARG, JSAP.STRING_PARSER, JSAP.REQUIRED, "HDFS location for the out data."), }); JSAPResult jsapResult = jsap.parse(args); if (!jsapResult.success()) { System.err.print(jsap.getUsage()); System.exit(1);/*from w w w. j a v a 2 s . c o m*/ } Configuration config = getConf(); boolean withContexts = !jsapResult.getBoolean(NO_CONTEXTS_ARG, false); config.setBoolean(TuplesToResourcesMapper.INCLUDE_CONTEXTS_KEY, withContexts); // The ontology if any... String ontologyFilename = jsapResult.getString(ONTOLOGY_ARG); if (ontologyFilename != null) { // Load the ontology InputStream ontologyInputStream = new FileInputStream(ontologyFilename); OWLOntology ontology = OntologyLoader.load(ontologyInputStream); System.out.println( "Loaded ontology from " + ontologyFilename + " with " + ontology.getAxiomCount() + " axioms."); ArrayList<String> ontologyClasses = new ArrayList<String>(); for (OWLClass owlClass : ontology.getClassesInSignature()) { ontologyClasses.add(owlClass.getIRI().toString()); } System.out.println("Adding " + ontologyClasses.size() + " classes from ontology."); config.setStrings(TuplesToResourcesMapper.EXTRA_RESOURCES, ontologyClasses.toArray(new String[0])); } else { System.out.println("No ontology filename set in conf. No ontology has been loaded."); } Job job = Job.getInstance(config); job.setJarByClass(PrepTool.class); job.setJobName(PrepTool.class.getName() + "-part1-" + System.currentTimeMillis()); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TuplesToResourcesMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); int reducerCount = jsapResult.getInt(REDUCER_COUNT_ARG, DEFAULT_REDUCER_COUNT); job.setNumReduceTasks(reducerCount); if (reducerCount == 1) { // We assign 'global' ids in the reducer. For this to work, there // can be only one. But using just one reducer, we run out of local disk space during the // pre-reduce merge with big data sets like WCC. job.setReducerClass(ResourcesReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Object.class); job.setOutputFormatClass(ResourceRecordWriter.OutputFormat.class); } else { /* * TODO: Take the functionality of the reducer and move it to run on * the gateway. We then use n identity reducers, the output of which * will be read and merged as streams on the gateway. */ } FileInputFormat.setInputPaths(job, new Path(jsapResult.getString(INPUT_ARG))); Path outputDir = new Path(jsapResult.getString(OUTPUT_ARG)); FileOutputFormat.setOutputPath(job, outputDir); if (!job.waitForCompletion(true)) { System.err.println("Failed to process tuples from " + jsapResult.getString(INPUT_ARG)); return 1; } // IF THERE WAS ONLY ONE REDUCER WE NOW HAVE // One file per reducer containing lists of urls(recourses) for // subjects, predicates, objects and contexts. // One file per reducer that contains all resources. subjects + // predicates + objects + contexts. // One file per reducer that contains the subjects + all <predicate> // <object>|"Literal" <context> on that subject. // IF THERE WAS MORE THAN ONE REDUCER WE NOW HAVE N FILES THAT NEED TO BE MERGED ON THE GATEWAY. TODO. return 0; }