List of usage examples for org.apache.hadoop.conf Configuration getStrings
public String[] getStrings(String name)
name
property as an array of String
s. From source file:com.osohm.nutch.parse.html.filter.DOMContentUtils.java
License:Apache License
public void setConf(Configuration conf) { // forceTags is used to override configurable tag ignoring, later on Collection<String> forceTags = new ArrayList<String>(1); this.conf = conf; linkParams.clear();//from w w w. j a va 2 s.co m linkParams.put("a", new LinkParams("a", "href", 1)); linkParams.put("area", new LinkParams("area", "href", 0)); if (conf.getBoolean("parser.html.form.use_action", true)) { linkParams.put("form", new LinkParams("form", "action", 1)); if (conf.get("parser.html.form.use_action") != null) forceTags.add("form"); } linkParams.put("frame", new LinkParams("frame", "src", 0)); linkParams.put("iframe", new LinkParams("iframe", "src", 0)); linkParams.put("script", new LinkParams("script", "src", 0)); linkParams.put("link", new LinkParams("link", "href", 0)); linkParams.put("img", new LinkParams("img", "src", 0)); // remove unwanted link tags from the linkParams map String[] ignoreTags = conf.getStrings("parser.html.outlinks.ignore_tags"); for (int i = 0; ignoreTags != null && i < ignoreTags.length; i++) { if (!forceTags.contains(ignoreTags[i])) linkParams.remove(ignoreTags[i]); } }
From source file:com.produban.openbus.persistence.HDFSUtils.java
License:Apache License
public static FileSystem getFS(String path, Configuration conf) { try {//from w w w. ja va 2 s.c o m FileSystem ret = new Path(path).getFileSystem(conf); if (ret instanceof LocalFileSystem) { LOG.info("Using local filesystem and disabling checksums"); ret = new RawLocalFileSystem(); try { ((RawLocalFileSystem) ret).initialize(new URI(URI_CONFIG), new Configuration()); } catch (URISyntaxException e) { throw new RuntimeException(e); } } else { LOG.info("No local filesystem " + conf.getStrings("fs.defaultFS")); } return ret; } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormat.java
License:Apache License
public static Collection<AttributeValue> getRangeKeyValues(Configuration conf) { List<AttributeValue> values = new ArrayList<AttributeValue>(); Types type = getRangeKeyType(conf); String[] encodedValues = conf.getStrings(DynamoDBConfiguration.RANGE_KEY_VALUES_PROPERTY); // if range key values have not been configured return if (null == encodedValues) return values; // decode values for (String encodedValue : encodedValues) { values.add(AttributeValueIOUtils.valueOf(type, encodedValue)); }//ww w .j a va 2 s .c om return values; }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormatTest.java
License:Apache License
@Test public void testGetRangeKeyValues() { Configuration conf = createMock(Configuration.class); final String[] VALUES = new String[] { "TEST1", "TEST2" }; Types type = Types.STRING; List<AttributeValue> attrs = new ArrayList<AttributeValue>(); for (String value : VALUES) { attrs.add(new AttributeValue().withS(value)); }//from w ww . ja va2 s . c o m expect(conf.getInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, Types.STRING.ordinal())) .andReturn(type.ordinal()); expect(conf.getStrings(DynamoDBConfiguration.RANGE_KEY_VALUES_PROPERTY)).andReturn(VALUES); replay(conf); Collection<AttributeValue> results = DynamoDBQueryInputFormat.getRangeKeyValues(conf); int i = 0; for (AttributeValue result : results) { assertEquals(VALUES[i++], result.getS()); } verify(conf); }
From source file:com.yahoo.glimmer.indexing.preprocessor.TuplesToResourcesMapper.java
License:Open Source License
protected void setup(Mapper<LongWritable, Text, Text, Object>.Context context) throws java.io.IOException, InterruptedException { Configuration conf = context.getConfiguration(); boolean includeContexts = conf.getBoolean(INCLUDE_CONTEXTS_KEY, true); setIncludeContexts(includeContexts); TupleFilter filter = TupleFilterSerializer.deserialize(conf); if (filter != null) { LOG.info("Using TupleFilter:\n" + filter.toString()); setFilter(filter);/*from w w w. j ava 2s .co m*/ } else { LOG.info("No TupleFilter given. Processing all tuples."); } extraResources = conf.getStrings(EXTRA_RESOURCES); }
From source file:com.yahoo.glimmer.indexing.RDFDocumentFactory.java
License:Open Source License
public static String[] getFieldsFromConf(Configuration conf) { String[] fields = conf.getStrings(CONF_FIELDNAMES_KEY); if (fields == null) { throw new IllegalStateException("Fields not set set in the config."); }/*from w w w . j a v a 2 s . c o m*/ return fields; }
From source file:diamondmapreduce.DiamondMapper.java
License:Apache License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //get query and database name from mapreduce driver Configuration conf = context.getConfiguration(); String query = conf.get(DiamondMapReduce.QUERY); String dataBase = conf.get(DiamondMapReduce.DATABASE); String[] args = conf.getStrings("DIAMOND-arguments"); //write key-value pair to local tmp WriteKeyValueToTemp.write(key.toString(), value.toString()); //use runtime to execute alignment, intermediate binary files are stored in local tmp DiamondAlignment.align(this.diamond, this.localDB, key.toString(), args, conf); //view the binary files to tabular output file, view output will be streammized into HDFS // DiamondView.view(this.diamond, key.toString(), conf); //delete all intermediate files DeleteIntermediateFiles.deleteFiles(key.toString()); context.write(new Text("key"), new Text(key.toString())); }
From source file:edu.indiana.d2i.htrc.io.mem.MemCachedRecordWriter.java
License:Apache License
public MemCachedRecordWriter(Configuration conf) { // read configuration MAX_EXPIRE = conf.getInt(HTRCConstants.MEMCACHED_MAX_EXPIRE, -1); int numClients = conf.getInt(HTRCConstants.MEMCACHED_CLIENT_NUM, -1); String[] hostArray = conf.getStrings(HTRCConstants.MEMCACHED_HOSTS); List<String> hosts = Arrays.asList(hostArray); Class<?> writableClass = conf.getClass("mapred.output.value.class", Writable.class); String namespace = conf.get(MemKMeansConfig.KEY_NS); if (namespace != null) NameSpace = namespace;/*from ww w.j a v a 2 s . c om*/ client = ThreadedMemcachedClient.getThreadedMemcachedClient(numClients, hosts); transcoder = new HadoopWritableTranscoder<V>(conf, writableClass); }
From source file:edu.indiana.d2i.htrc.io.mem.ThreadedMemcachedClient.java
License:Apache License
public static ThreadedMemcachedClient getThreadedMemcachedClient(Configuration conf) { int numClients = conf.getInt(HTRCConstants.MEMCACHED_CLIENT_NUM, 1); String[] hostArray = conf.getStrings(HTRCConstants.MEMCACHED_HOSTS); List<String> hosts = Arrays.asList(hostArray); return getThreadedMemcachedClient(numClients, hosts); }
From source file:fi.tkk.ics.hadoop.bam.cli.Utils.java
License:Open Source License
/** Computes the merger of the SAM headers in the files listed in * HEADERMERGER_INPUTS_PROPERTY. The sort order of the result is set * according to the last call to setHeaderMergerSortOrder, or otherwise * to "unsorted"./*from ww w .j ava 2 s .c om*/ * * The result is cached locally to prevent it from being recomputed too * often. */ public static SamFileHeaderMerger getSAMHeaderMerger(Configuration conf) throws IOException { // TODO: it would be preferable to cache this beforehand instead of // having every task read the header block of every input file. But that // would be trickier, given that SamFileHeaderMerger isn't trivially // serializable. // Save it in a static field, though, in case that helps anything. if (headerMerger != null) return headerMerger; final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(); for (final String in : conf.getStrings(HEADERMERGER_INPUTS_PROPERTY)) { final Path p = new Path(in); final SAMFileReader r = new SAMFileReader(p.getFileSystem(conf).open(p)); headers.add(r.getFileHeader()); r.close(); } final String orderStr = conf.get(HEADERMERGER_SORTORDER_PROP); final SAMFileHeader.SortOrder order = orderStr == null ? SAMFileHeader.SortOrder.unsorted : SAMFileHeader.SortOrder.valueOf(orderStr); return headerMerger = new SamFileHeaderMerger(order, headers, true); }