Example usage for org.apache.hadoop.conf Configuration getStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getStrings.

Prototype

public String[] getStrings(String name)

Source Link

Document

Get the comma delimited values of the name property as an array of Strings.

Usage

From source file:com.osohm.nutch.parse.html.filter.DOMContentUtils.java

License:Apache License

public void setConf(Configuration conf) {
    // forceTags is used to override configurable tag ignoring, later on
    Collection<String> forceTags = new ArrayList<String>(1);

    this.conf = conf;
    linkParams.clear();//from w  w w.  j  a  va 2 s.co  m
    linkParams.put("a", new LinkParams("a", "href", 1));
    linkParams.put("area", new LinkParams("area", "href", 0));

    if (conf.getBoolean("parser.html.form.use_action", true)) {
        linkParams.put("form", new LinkParams("form", "action", 1));
        if (conf.get("parser.html.form.use_action") != null)
            forceTags.add("form");
    }

    linkParams.put("frame", new LinkParams("frame", "src", 0));
    linkParams.put("iframe", new LinkParams("iframe", "src", 0));
    linkParams.put("script", new LinkParams("script", "src", 0));
    linkParams.put("link", new LinkParams("link", "href", 0));
    linkParams.put("img", new LinkParams("img", "src", 0));

    // remove unwanted link tags from the linkParams map
    String[] ignoreTags = conf.getStrings("parser.html.outlinks.ignore_tags");
    for (int i = 0; ignoreTags != null && i < ignoreTags.length; i++) {
        if (!forceTags.contains(ignoreTags[i]))
            linkParams.remove(ignoreTags[i]);
    }
}

From source file:com.produban.openbus.persistence.HDFSUtils.java

License:Apache License

public static FileSystem getFS(String path, Configuration conf) {
    try {//from w  w w. ja  va 2  s.c  o m
        FileSystem ret = new Path(path).getFileSystem(conf);

        if (ret instanceof LocalFileSystem) {
            LOG.info("Using local filesystem and disabling checksums");
            ret = new RawLocalFileSystem();

            try {
                ((RawLocalFileSystem) ret).initialize(new URI(URI_CONFIG), new Configuration());
            } catch (URISyntaxException e) {
                throw new RuntimeException(e);
            }
        } else {
            LOG.info("No local filesystem " + conf.getStrings("fs.defaultFS"));
        }

        return ret;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormat.java

License:Apache License

public static Collection<AttributeValue> getRangeKeyValues(Configuration conf) {
    List<AttributeValue> values = new ArrayList<AttributeValue>();
    Types type = getRangeKeyType(conf);
    String[] encodedValues = conf.getStrings(DynamoDBConfiguration.RANGE_KEY_VALUES_PROPERTY);

    // if range key values have not been configured return
    if (null == encodedValues)
        return values;

    // decode values
    for (String encodedValue : encodedValues) {
        values.add(AttributeValueIOUtils.valueOf(type, encodedValue));
    }//ww w .j  a va 2  s .c om

    return values;
}

From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormatTest.java

License:Apache License

@Test
public void testGetRangeKeyValues() {
    Configuration conf = createMock(Configuration.class);
    final String[] VALUES = new String[] { "TEST1", "TEST2" };
    Types type = Types.STRING;

    List<AttributeValue> attrs = new ArrayList<AttributeValue>();
    for (String value : VALUES) {
        attrs.add(new AttributeValue().withS(value));
    }//from w ww  . ja va2 s . c o  m

    expect(conf.getInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, Types.STRING.ordinal()))
            .andReturn(type.ordinal());
    expect(conf.getStrings(DynamoDBConfiguration.RANGE_KEY_VALUES_PROPERTY)).andReturn(VALUES);

    replay(conf);

    Collection<AttributeValue> results = DynamoDBQueryInputFormat.getRangeKeyValues(conf);
    int i = 0;
    for (AttributeValue result : results) {
        assertEquals(VALUES[i++], result.getS());
    }

    verify(conf);
}

From source file:com.yahoo.glimmer.indexing.preprocessor.TuplesToResourcesMapper.java

License:Open Source License

protected void setup(Mapper<LongWritable, Text, Text, Object>.Context context)
        throws java.io.IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    boolean includeContexts = conf.getBoolean(INCLUDE_CONTEXTS_KEY, true);
    setIncludeContexts(includeContexts);

    TupleFilter filter = TupleFilterSerializer.deserialize(conf);
    if (filter != null) {
        LOG.info("Using TupleFilter:\n" + filter.toString());
        setFilter(filter);/*from   w  w  w.  j  ava  2s  .co m*/
    } else {
        LOG.info("No TupleFilter given. Processing all tuples.");
    }

    extraResources = conf.getStrings(EXTRA_RESOURCES);
}

From source file:com.yahoo.glimmer.indexing.RDFDocumentFactory.java

License:Open Source License

public static String[] getFieldsFromConf(Configuration conf) {
    String[] fields = conf.getStrings(CONF_FIELDNAMES_KEY);
    if (fields == null) {
        throw new IllegalStateException("Fields not set set in the config.");
    }/*from   w w w  . j a v a  2  s . c  o m*/
    return fields;
}

From source file:diamondmapreduce.DiamondMapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    //get query and database name from mapreduce driver
    Configuration conf = context.getConfiguration();
    String query = conf.get(DiamondMapReduce.QUERY);
    String dataBase = conf.get(DiamondMapReduce.DATABASE);
    String[] args = conf.getStrings("DIAMOND-arguments");

    //write key-value pair to local tmp
    WriteKeyValueToTemp.write(key.toString(), value.toString());

    //use runtime to execute alignment, intermediate binary files are stored in local tmp
    DiamondAlignment.align(this.diamond, this.localDB, key.toString(), args, conf);

    //view the binary files to tabular output file, view output will be streammized into HDFS
    //        DiamondView.view(this.diamond, key.toString(), conf);

    //delete all intermediate files
    DeleteIntermediateFiles.deleteFiles(key.toString());

    context.write(new Text("key"), new Text(key.toString()));

}

From source file:edu.indiana.d2i.htrc.io.mem.MemCachedRecordWriter.java

License:Apache License

public MemCachedRecordWriter(Configuration conf) {
    // read configuration
    MAX_EXPIRE = conf.getInt(HTRCConstants.MEMCACHED_MAX_EXPIRE, -1);
    int numClients = conf.getInt(HTRCConstants.MEMCACHED_CLIENT_NUM, -1);
    String[] hostArray = conf.getStrings(HTRCConstants.MEMCACHED_HOSTS);
    List<String> hosts = Arrays.asList(hostArray);
    Class<?> writableClass = conf.getClass("mapred.output.value.class", Writable.class);

    String namespace = conf.get(MemKMeansConfig.KEY_NS);
    if (namespace != null)
        NameSpace = namespace;/*from ww  w.j  a v  a 2 s  . c  om*/

    client = ThreadedMemcachedClient.getThreadedMemcachedClient(numClients, hosts);
    transcoder = new HadoopWritableTranscoder<V>(conf, writableClass);
}

From source file:edu.indiana.d2i.htrc.io.mem.ThreadedMemcachedClient.java

License:Apache License

public static ThreadedMemcachedClient getThreadedMemcachedClient(Configuration conf) {
    int numClients = conf.getInt(HTRCConstants.MEMCACHED_CLIENT_NUM, 1);
    String[] hostArray = conf.getStrings(HTRCConstants.MEMCACHED_HOSTS);
    List<String> hosts = Arrays.asList(hostArray);
    return getThreadedMemcachedClient(numClients, hosts);
}

From source file:fi.tkk.ics.hadoop.bam.cli.Utils.java

License:Open Source License

/** Computes the merger of the SAM headers in the files listed in
 * HEADERMERGER_INPUTS_PROPERTY. The sort order of the result is set
 * according to the last call to setHeaderMergerSortOrder, or otherwise
 * to "unsorted"./*from  ww  w  .j  ava  2 s .c om*/
 *
 * The result is cached locally to prevent it from being recomputed too
 * often.
 */
public static SamFileHeaderMerger getSAMHeaderMerger(Configuration conf) throws IOException {
    // TODO: it would be preferable to cache this beforehand instead of
    // having every task read the header block of every input file. But that
    // would be trickier, given that SamFileHeaderMerger isn't trivially
    // serializable.

    // Save it in a static field, though, in case that helps anything.
    if (headerMerger != null)
        return headerMerger;

    final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();

    for (final String in : conf.getStrings(HEADERMERGER_INPUTS_PROPERTY)) {
        final Path p = new Path(in);

        final SAMFileReader r = new SAMFileReader(p.getFileSystem(conf).open(p));
        headers.add(r.getFileHeader());
        r.close();
    }

    final String orderStr = conf.get(HEADERMERGER_SORTORDER_PROP);
    final SAMFileHeader.SortOrder order = orderStr == null ? SAMFileHeader.SortOrder.unsorted
            : SAMFileHeader.SortOrder.valueOf(orderStr);

    return headerMerger = new SamFileHeaderMerger(order, headers, true);
}