Example usage for org.apache.hadoop.conf Configuration getStrings

List of usage examples for org.apache.hadoop.conf Configuration getStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getStrings.

Prototype

public String[] getStrings(String name) 

Source Link

Document

Get the comma delimited values of the name property as an array of Strings.

Usage

From source file:com.osohm.nutch.parse.html.filter.DOMContentUtils.java

License:Apache License

public void setConf(Configuration conf) {
    // forceTags is used to override configurable tag ignoring, later on
    Collection<String> forceTags = new ArrayList<String>(1);

    this.conf = conf;
    linkParams.clear();//from w  w w.  j  a  va 2 s.co  m
    linkParams.put("a", new LinkParams("a", "href", 1));
    linkParams.put("area", new LinkParams("area", "href", 0));

    if (conf.getBoolean("parser.html.form.use_action", true)) {
        linkParams.put("form", new LinkParams("form", "action", 1));
        if (conf.get("parser.html.form.use_action") != null)
            forceTags.add("form");
    }

    linkParams.put("frame", new LinkParams("frame", "src", 0));
    linkParams.put("iframe", new LinkParams("iframe", "src", 0));
    linkParams.put("script", new LinkParams("script", "src", 0));
    linkParams.put("link", new LinkParams("link", "href", 0));
    linkParams.put("img", new LinkParams("img", "src", 0));

    // remove unwanted link tags from the linkParams map
    String[] ignoreTags = conf.getStrings("parser.html.outlinks.ignore_tags");
    for (int i = 0; ignoreTags != null && i < ignoreTags.length; i++) {
        if (!forceTags.contains(ignoreTags[i]))
            linkParams.remove(ignoreTags[i]);
    }
}

From source file:com.produban.openbus.persistence.HDFSUtils.java

License:Apache License

public static FileSystem getFS(String path, Configuration conf) {
    try {//from w  w w. ja  va 2  s.c  o m
        FileSystem ret = new Path(path).getFileSystem(conf);

        if (ret instanceof LocalFileSystem) {
            LOG.info("Using local filesystem and disabling checksums");
            ret = new RawLocalFileSystem();

            try {
                ((RawLocalFileSystem) ret).initialize(new URI(URI_CONFIG), new Configuration());
            } catch (URISyntaxException e) {
                throw new RuntimeException(e);
            }
        } else {
            LOG.info("No local filesystem " + conf.getStrings("fs.defaultFS"));
        }

        return ret;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormat.java

License:Apache License

public static Collection<AttributeValue> getRangeKeyValues(Configuration conf) {
    List<AttributeValue> values = new ArrayList<AttributeValue>();
    Types type = getRangeKeyType(conf);
    String[] encodedValues = conf.getStrings(DynamoDBConfiguration.RANGE_KEY_VALUES_PROPERTY);

    // if range key values have not been configured return
    if (null == encodedValues)
        return values;

    // decode values
    for (String encodedValue : encodedValues) {
        values.add(AttributeValueIOUtils.valueOf(type, encodedValue));
    }//ww w .j  a va 2  s .c om

    return values;
}

From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormatTest.java

License:Apache License

@Test
public void testGetRangeKeyValues() {
    Configuration conf = createMock(Configuration.class);
    final String[] VALUES = new String[] { "TEST1", "TEST2" };
    Types type = Types.STRING;

    List<AttributeValue> attrs = new ArrayList<AttributeValue>();
    for (String value : VALUES) {
        attrs.add(new AttributeValue().withS(value));
    }//from w ww  . ja va2 s . c o  m

    expect(conf.getInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, Types.STRING.ordinal()))
            .andReturn(type.ordinal());
    expect(conf.getStrings(DynamoDBConfiguration.RANGE_KEY_VALUES_PROPERTY)).andReturn(VALUES);

    replay(conf);

    Collection<AttributeValue> results = DynamoDBQueryInputFormat.getRangeKeyValues(conf);
    int i = 0;
    for (AttributeValue result : results) {
        assertEquals(VALUES[i++], result.getS());
    }

    verify(conf);
}

From source file:com.yahoo.glimmer.indexing.preprocessor.TuplesToResourcesMapper.java

License:Open Source License

protected void setup(Mapper<LongWritable, Text, Text, Object>.Context context)
        throws java.io.IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    boolean includeContexts = conf.getBoolean(INCLUDE_CONTEXTS_KEY, true);
    setIncludeContexts(includeContexts);

    TupleFilter filter = TupleFilterSerializer.deserialize(conf);
    if (filter != null) {
        LOG.info("Using TupleFilter:\n" + filter.toString());
        setFilter(filter);/*from   w  w  w.  j  ava  2s  .co m*/
    } else {
        LOG.info("No TupleFilter given. Processing all tuples.");
    }

    extraResources = conf.getStrings(EXTRA_RESOURCES);
}

From source file:com.yahoo.glimmer.indexing.RDFDocumentFactory.java

License:Open Source License

public static String[] getFieldsFromConf(Configuration conf) {
    String[] fields = conf.getStrings(CONF_FIELDNAMES_KEY);
    if (fields == null) {
        throw new IllegalStateException("Fields not set set in the config.");
    }/*from   w w w  . j a v a  2  s . c  o m*/
    return fields;
}

From source file:diamondmapreduce.DiamondMapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    //get query and database name from mapreduce driver
    Configuration conf = context.getConfiguration();
    String query = conf.get(DiamondMapReduce.QUERY);
    String dataBase = conf.get(DiamondMapReduce.DATABASE);
    String[] args = conf.getStrings("DIAMOND-arguments");

    //write key-value pair to local tmp
    WriteKeyValueToTemp.write(key.toString(), value.toString());

    //use runtime to execute alignment, intermediate binary files are stored in local tmp
    DiamondAlignment.align(this.diamond, this.localDB, key.toString(), args, conf);

    //view the binary files to tabular output file, view output will be streammized into HDFS
    //        DiamondView.view(this.diamond, key.toString(), conf);

    //delete all intermediate files
    DeleteIntermediateFiles.deleteFiles(key.toString());

    context.write(new Text("key"), new Text(key.toString()));

}

From source file:edu.indiana.d2i.htrc.io.mem.MemCachedRecordWriter.java

License:Apache License

public MemCachedRecordWriter(Configuration conf) {
    // read configuration
    MAX_EXPIRE = conf.getInt(HTRCConstants.MEMCACHED_MAX_EXPIRE, -1);
    int numClients = conf.getInt(HTRCConstants.MEMCACHED_CLIENT_NUM, -1);
    String[] hostArray = conf.getStrings(HTRCConstants.MEMCACHED_HOSTS);
    List<String> hosts = Arrays.asList(hostArray);
    Class<?> writableClass = conf.getClass("mapred.output.value.class", Writable.class);

    String namespace = conf.get(MemKMeansConfig.KEY_NS);
    if (namespace != null)
        NameSpace = namespace;/*from ww  w.j  a v  a 2 s  . c  om*/

    client = ThreadedMemcachedClient.getThreadedMemcachedClient(numClients, hosts);
    transcoder = new HadoopWritableTranscoder<V>(conf, writableClass);
}

From source file:edu.indiana.d2i.htrc.io.mem.ThreadedMemcachedClient.java

License:Apache License

public static ThreadedMemcachedClient getThreadedMemcachedClient(Configuration conf) {
    int numClients = conf.getInt(HTRCConstants.MEMCACHED_CLIENT_NUM, 1);
    String[] hostArray = conf.getStrings(HTRCConstants.MEMCACHED_HOSTS);
    List<String> hosts = Arrays.asList(hostArray);
    return getThreadedMemcachedClient(numClients, hosts);
}

From source file:fi.tkk.ics.hadoop.bam.cli.Utils.java

License:Open Source License

/** Computes the merger of the SAM headers in the files listed in
 * HEADERMERGER_INPUTS_PROPERTY. The sort order of the result is set
 * according to the last call to setHeaderMergerSortOrder, or otherwise
 * to "unsorted"./*from  ww  w  .j  ava  2 s .c om*/
 *
 * The result is cached locally to prevent it from being recomputed too
 * often.
 */
public static SamFileHeaderMerger getSAMHeaderMerger(Configuration conf) throws IOException {
    // TODO: it would be preferable to cache this beforehand instead of
    // having every task read the header block of every input file. But that
    // would be trickier, given that SamFileHeaderMerger isn't trivially
    // serializable.

    // Save it in a static field, though, in case that helps anything.
    if (headerMerger != null)
        return headerMerger;

    final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();

    for (final String in : conf.getStrings(HEADERMERGER_INPUTS_PROPERTY)) {
        final Path p = new Path(in);

        final SAMFileReader r = new SAMFileReader(p.getFileSystem(conf).open(p));
        headers.add(r.getFileHeader());
        r.close();
    }

    final String orderStr = conf.get(HEADERMERGER_SORTORDER_PROP);
    final SAMFileHeader.SortOrder order = orderStr == null ? SAMFileHeader.SortOrder.unsorted
            : SAMFileHeader.SortOrder.valueOf(orderStr);

    return headerMerger = new SamFileHeaderMerger(order, headers, true);
}