List of usage examples for org.apache.hadoop.conf Configuration getStrings
public String[] getStrings(String name)
name
property as an array of String
s. From source file:org.apache.nutch.analysis.lang.HTMLLanguageParser.java
License:Apache License
public void setConf(Configuration conf) { this.conf = conf; onlyCertain = conf.getBoolean("lang.identification.only.certain", false); String[] policy = conf.getStrings("lang.extraction.policy"); for (int i = 0; i < policy.length; i++) { if (policy[i].equals("detect")) { detect = i;/* ww w. j av a2s. c o m*/ } else if (policy[i].equals("identify")) { identify = i; } } }
From source file:org.apache.nutch.clustering.carrot2.Clusterer.java
License:Apache License
/** * Implementation of {@link Configurable} */// w w w . j a v a 2s .c om public void setConf(Configuration conf) { this.conf = conf; // Configure default language and other component settings. if (conf.get(CONF_PROP_DEFAULT_LANGUAGE) != null) { // Change the default language. this.defaultLanguage = conf.get(CONF_PROP_DEFAULT_LANGUAGE); } if (conf.getStrings(CONF_PROP_LANGUAGES) != null) { this.languages = conf.getStrings(CONF_PROP_LANGUAGES); } if (logger.isInfoEnabled()) { logger.info("Default language: " + defaultLanguage); logger.info("Enabled languages: " + Arrays.asList(languages)); } initialize(); }
From source file:org.apache.nutch.hostdb.UpdateHostDbReducer.java
License:Apache License
/** * Configures the thread pool and prestarts all resolver threads. *//* w w w .j a v a 2 s. co m*/ @Override public void setup(Reducer<Text, NutchWritable, Text, HostDatum>.Context context) { Configuration conf = context.getConfiguration(); purgeFailedHostsThreshold = conf.getInt(UpdateHostDb.HOSTDB_PURGE_FAILED_HOSTS_THRESHOLD, -1); numResolverThreads = conf.getInt(UpdateHostDb.HOSTDB_NUM_RESOLVER_THREADS, 10); recheckInterval = conf.getInt(UpdateHostDb.HOSTDB_RECHECK_INTERVAL, 86400) * 1000; checkFailed = conf.getBoolean(UpdateHostDb.HOSTDB_CHECK_FAILED, false); checkNew = conf.getBoolean(UpdateHostDb.HOSTDB_CHECK_NEW, false); checkKnown = conf.getBoolean(UpdateHostDb.HOSTDB_CHECK_KNOWN, false); force = conf.getBoolean(UpdateHostDb.HOSTDB_FORCE_CHECK, false); numericFields = conf.getStrings(UpdateHostDb.HOSTDB_NUMERIC_FIELDS); stringFields = conf.getStrings(UpdateHostDb.HOSTDB_STRING_FIELDS); percentiles = conf.getInts(UpdateHostDb.HOSTDB_PERCENTILES); // What fields do we need to collect metadata from if (numericFields != null) { numericFieldWritables = new Text[numericFields.length]; for (int i = 0; i < numericFields.length; i++) { numericFieldWritables[i] = new Text(numericFields[i]); } } if (stringFields != null) { stringFieldWritables = new Text[stringFields.length]; for (int i = 0; i < stringFields.length; i++) { stringFieldWritables[i] = new Text(stringFields[i]); } } // Initialize the thread pool with our queue executor = new ThreadPoolExecutor(numResolverThreads, numResolverThreads, 5, TimeUnit.SECONDS, queue); // Run all threads in the pool executor.prestartAllCoreThreads(); }
From source file:org.apache.nutch.indexer.metadata.MetadataIndexer.java
License:Apache License
public void setConf(Configuration conf) { this.conf = conf; String[] metatags = conf.getStrings(PARSE_CONF_PROPERTY); parseFieldnames = new TreeMap<Utf8, String>(); for (int i = 0; i < metatags.length; i++) { parseFieldnames.put(new Utf8(PARSE_META_PREFIX + metatags[i].toLowerCase(Locale.ROOT)), INDEX_PREFIX + metatags[i]); }// www .j av a 2s.c o m // TODO check conflict between field names e.g. could have same label // from different sources }
From source file:org.apache.nutch.indexer.NutchIndexWriterFactory.java
License:Apache License
@SuppressWarnings("unchecked") public static NutchIndexWriter[] getNutchIndexWriters(Configuration conf) { final String[] classes = conf.getStrings("indexer.writer.classes"); final NutchIndexWriter[] writers = new NutchIndexWriter[classes.length]; for (int i = 0; i < classes.length; i++) { final String clazz = classes[i]; try {/*from w w w. ja v a 2s. com*/ final Class<NutchIndexWriter> implClass = (Class<NutchIndexWriter>) Class.forName(clazz); writers[i] = implClass.newInstance(); } catch (final Exception e) { throw new RuntimeException("Couldn't create " + clazz, e); } } return writers; }
From source file:org.apache.nutch.indexer.urlmeta.URLMetaIndexingFilter.java
License:Apache License
/** * handles conf assignment and pulls the value assignment from the * "urlmeta.tags" property//w ww. j av a 2 s .c om */ public void setConf(Configuration conf) { this.conf = conf; if (conf == null) return; urlMetaTags = conf.getStrings(CONF_PROPERTY); }
From source file:org.apache.nutch.parse.headings.HeadingsParseFilter.java
License:Apache License
public void setConf(Configuration conf) { this.conf = conf; headings = conf.getStrings("headings"); }
From source file:org.apache.nutch.parse.html.DOMContentUtils.java
License:Apache License
public void setConf(Configuration conf) { // forceTags is used to override configurable tag ignoring, later on Collection<String> forceTags = new ArrayList<String>(1); linkParams.clear();/*from w w w . ja v a 2s.co m*/ linkParams.put("a", new LinkParams("a", "href", 1)); linkParams.put("area", new LinkParams("area", "href", 0)); if (conf.getBoolean("parser.html.form.use_action", true)) { linkParams.put("form", new LinkParams("form", "action", 1)); if (conf.get("parser.html.form.use_action") != null) forceTags.add("form"); } linkParams.put("frame", new LinkParams("frame", "src", 0)); linkParams.put("iframe", new LinkParams("iframe", "src", 0)); linkParams.put("script", new LinkParams("script", "src", 0)); linkParams.put("link", new LinkParams("link", "href", 0)); linkParams.put("img", new LinkParams("img", "src", 0)); // remove unwanted link tags from the linkParams map String[] ignoreTags = conf.getStrings("parser.html.outlinks.ignore_tags"); for (int i = 0; ignoreTags != null && i < ignoreTags.length; i++) { if (!forceTags.contains(ignoreTags[i])) linkParams.remove(ignoreTags[i]); } }
From source file:org.apache.nutch.plugin.PluginRepository.java
License:Apache License
/** * @throws PluginRuntimeException/* w ww . j ava2 s . c o m*/ * @see java.lang.Object#Object() */ public PluginRepository(Configuration conf) throws RuntimeException { fActivatedPlugins = new HashMap<String, Plugin>(); fExtensionPoints = new HashMap<String, ExtensionPoint>(); this.conf = new Configuration(conf); this.auto = conf.getBoolean("plugin.auto-activation", true); String[] pluginFolders = conf.getStrings("plugin.folders"); PluginManifestParser manifestParser = new PluginManifestParser(this.conf, this); Map<String, PluginDescriptor> allPlugins = manifestParser.parsePluginFolder(pluginFolders); if (allPlugins.isEmpty()) { LOG.warn("No plugins found on paths of property plugin.folders=\"{}\"", conf.get("plugin.folders")); } Pattern excludes = Pattern.compile(conf.get("plugin.excludes", "")); Pattern includes = Pattern.compile(conf.get("plugin.includes", "")); Map<String, PluginDescriptor> filteredPlugins = filter(excludes, includes, allPlugins); fRegisteredPlugins = getDependencyCheckedPlugins(filteredPlugins, this.auto ? allPlugins : filteredPlugins); installExtensionPoints(fRegisteredPlugins); try { installExtensions(fRegisteredPlugins); } catch (PluginRuntimeException e) { LOG.error(e.toString()); throw new RuntimeException(e.getMessage()); } displayStatus(); }
From source file:org.apache.nutch.scoring.urlmeta.URLMetaScoringFilter.java
License:Apache License
/** * handles conf assignment and pulls the value assignment from the * "urlmeta.tags" property/* w ww . j a va2 s. com*/ */ public void setConf(Configuration conf) { super.setConf(conf); if (conf == null) return; urlMetaTags = conf.getStrings(CONF_PROPERTY); }