Example usage for org.apache.hadoop.conf Configuration getStrings

List of usage examples for org.apache.hadoop.conf Configuration getStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getStrings.

Prototype

public String[] getStrings(String name) 

Source Link

Document

Get the comma delimited values of the name property as an array of Strings.

Usage

From source file:org.apache.nutch.analysis.lang.HTMLLanguageParser.java

License:Apache License

public void setConf(Configuration conf) {
    this.conf = conf;
    onlyCertain = conf.getBoolean("lang.identification.only.certain", false);
    String[] policy = conf.getStrings("lang.extraction.policy");
    for (int i = 0; i < policy.length; i++) {
        if (policy[i].equals("detect")) {
            detect = i;/*  ww w.  j  av a2s.  c o m*/
        } else if (policy[i].equals("identify")) {
            identify = i;
        }
    }
}

From source file:org.apache.nutch.clustering.carrot2.Clusterer.java

License:Apache License

/**
 * Implementation of {@link Configurable}
 *///  w w  w .  j a v  a 2s .c  om
public void setConf(Configuration conf) {
    this.conf = conf;

    // Configure default language and other component settings.
    if (conf.get(CONF_PROP_DEFAULT_LANGUAGE) != null) {
        // Change the default language.
        this.defaultLanguage = conf.get(CONF_PROP_DEFAULT_LANGUAGE);
    }
    if (conf.getStrings(CONF_PROP_LANGUAGES) != null) {
        this.languages = conf.getStrings(CONF_PROP_LANGUAGES);
    }

    if (logger.isInfoEnabled()) {
        logger.info("Default language: " + defaultLanguage);
        logger.info("Enabled languages: " + Arrays.asList(languages));
    }

    initialize();
}

From source file:org.apache.nutch.hostdb.UpdateHostDbReducer.java

License:Apache License

/**
  * Configures the thread pool and prestarts all resolver threads.
  *//* w  w  w  .j a v  a 2  s.  co m*/
@Override
public void setup(Reducer<Text, NutchWritable, Text, HostDatum>.Context context) {
    Configuration conf = context.getConfiguration();
    purgeFailedHostsThreshold = conf.getInt(UpdateHostDb.HOSTDB_PURGE_FAILED_HOSTS_THRESHOLD, -1);
    numResolverThreads = conf.getInt(UpdateHostDb.HOSTDB_NUM_RESOLVER_THREADS, 10);
    recheckInterval = conf.getInt(UpdateHostDb.HOSTDB_RECHECK_INTERVAL, 86400) * 1000;
    checkFailed = conf.getBoolean(UpdateHostDb.HOSTDB_CHECK_FAILED, false);
    checkNew = conf.getBoolean(UpdateHostDb.HOSTDB_CHECK_NEW, false);
    checkKnown = conf.getBoolean(UpdateHostDb.HOSTDB_CHECK_KNOWN, false);
    force = conf.getBoolean(UpdateHostDb.HOSTDB_FORCE_CHECK, false);
    numericFields = conf.getStrings(UpdateHostDb.HOSTDB_NUMERIC_FIELDS);
    stringFields = conf.getStrings(UpdateHostDb.HOSTDB_STRING_FIELDS);
    percentiles = conf.getInts(UpdateHostDb.HOSTDB_PERCENTILES);

    // What fields do we need to collect metadata from
    if (numericFields != null) {
        numericFieldWritables = new Text[numericFields.length];
        for (int i = 0; i < numericFields.length; i++) {
            numericFieldWritables[i] = new Text(numericFields[i]);
        }
    }

    if (stringFields != null) {
        stringFieldWritables = new Text[stringFields.length];
        for (int i = 0; i < stringFields.length; i++) {
            stringFieldWritables[i] = new Text(stringFields[i]);
        }
    }

    // Initialize the thread pool with our queue
    executor = new ThreadPoolExecutor(numResolverThreads, numResolverThreads, 5, TimeUnit.SECONDS, queue);

    // Run all threads in the pool
    executor.prestartAllCoreThreads();
}

From source file:org.apache.nutch.indexer.metadata.MetadataIndexer.java

License:Apache License

public void setConf(Configuration conf) {
    this.conf = conf;
    String[] metatags = conf.getStrings(PARSE_CONF_PROPERTY);
    parseFieldnames = new TreeMap<Utf8, String>();
    for (int i = 0; i < metatags.length; i++) {
        parseFieldnames.put(new Utf8(PARSE_META_PREFIX + metatags[i].toLowerCase(Locale.ROOT)),
                INDEX_PREFIX + metatags[i]);
    }// www  .j  av  a  2s.c  o  m
    // TODO check conflict between field names e.g. could have same label
    // from different sources
}

From source file:org.apache.nutch.indexer.NutchIndexWriterFactory.java

License:Apache License

@SuppressWarnings("unchecked")
public static NutchIndexWriter[] getNutchIndexWriters(Configuration conf) {
    final String[] classes = conf.getStrings("indexer.writer.classes");
    final NutchIndexWriter[] writers = new NutchIndexWriter[classes.length];
    for (int i = 0; i < classes.length; i++) {
        final String clazz = classes[i];
        try {/*from  w w  w. ja v  a  2s.  com*/
            final Class<NutchIndexWriter> implClass = (Class<NutchIndexWriter>) Class.forName(clazz);
            writers[i] = implClass.newInstance();
        } catch (final Exception e) {
            throw new RuntimeException("Couldn't create " + clazz, e);
        }
    }
    return writers;
}

From source file:org.apache.nutch.indexer.urlmeta.URLMetaIndexingFilter.java

License:Apache License

/**
 * handles conf assignment and pulls the value assignment from the
 * "urlmeta.tags" property//w ww. j av  a  2 s .c om
 */
public void setConf(Configuration conf) {
    this.conf = conf;

    if (conf == null)
        return;

    urlMetaTags = conf.getStrings(CONF_PROPERTY);
}

From source file:org.apache.nutch.parse.headings.HeadingsParseFilter.java

License:Apache License

public void setConf(Configuration conf) {
    this.conf = conf;

    headings = conf.getStrings("headings");
}

From source file:org.apache.nutch.parse.html.DOMContentUtils.java

License:Apache License

public void setConf(Configuration conf) {
    // forceTags is used to override configurable tag ignoring, later on
    Collection<String> forceTags = new ArrayList<String>(1);

    linkParams.clear();/*from w w w  .  ja v  a 2s.co  m*/
    linkParams.put("a", new LinkParams("a", "href", 1));
    linkParams.put("area", new LinkParams("area", "href", 0));
    if (conf.getBoolean("parser.html.form.use_action", true)) {
        linkParams.put("form", new LinkParams("form", "action", 1));
        if (conf.get("parser.html.form.use_action") != null)
            forceTags.add("form");
    }
    linkParams.put("frame", new LinkParams("frame", "src", 0));
    linkParams.put("iframe", new LinkParams("iframe", "src", 0));
    linkParams.put("script", new LinkParams("script", "src", 0));
    linkParams.put("link", new LinkParams("link", "href", 0));
    linkParams.put("img", new LinkParams("img", "src", 0));

    // remove unwanted link tags from the linkParams map
    String[] ignoreTags = conf.getStrings("parser.html.outlinks.ignore_tags");
    for (int i = 0; ignoreTags != null && i < ignoreTags.length; i++) {
        if (!forceTags.contains(ignoreTags[i]))
            linkParams.remove(ignoreTags[i]);
    }
}

From source file:org.apache.nutch.plugin.PluginRepository.java

License:Apache License

/**
 * @throws PluginRuntimeException/* w  ww . j ava2  s  .  c o m*/
 * @see java.lang.Object#Object()
 */
public PluginRepository(Configuration conf) throws RuntimeException {
    fActivatedPlugins = new HashMap<String, Plugin>();
    fExtensionPoints = new HashMap<String, ExtensionPoint>();
    this.conf = new Configuration(conf);
    this.auto = conf.getBoolean("plugin.auto-activation", true);
    String[] pluginFolders = conf.getStrings("plugin.folders");
    PluginManifestParser manifestParser = new PluginManifestParser(this.conf, this);
    Map<String, PluginDescriptor> allPlugins = manifestParser.parsePluginFolder(pluginFolders);
    if (allPlugins.isEmpty()) {
        LOG.warn("No plugins found on paths of property plugin.folders=\"{}\"", conf.get("plugin.folders"));
    }
    Pattern excludes = Pattern.compile(conf.get("plugin.excludes", ""));
    Pattern includes = Pattern.compile(conf.get("plugin.includes", ""));
    Map<String, PluginDescriptor> filteredPlugins = filter(excludes, includes, allPlugins);
    fRegisteredPlugins = getDependencyCheckedPlugins(filteredPlugins, this.auto ? allPlugins : filteredPlugins);
    installExtensionPoints(fRegisteredPlugins);
    try {
        installExtensions(fRegisteredPlugins);
    } catch (PluginRuntimeException e) {
        LOG.error(e.toString());
        throw new RuntimeException(e.getMessage());
    }
    displayStatus();
}

From source file:org.apache.nutch.scoring.urlmeta.URLMetaScoringFilter.java

License:Apache License

/**
 * handles conf assignment and pulls the value assignment from the
 * "urlmeta.tags" property/*  w ww . j a  va2  s.  com*/
 */
public void setConf(Configuration conf) {
    super.setConf(conf);

    if (conf == null)
        return;

    urlMetaTags = conf.getStrings(CONF_PROPERTY);
}