List of usage examples for org.apache.hadoop.conf Configuration getConfResourceAsReader
public Reader getConfResourceAsReader(String name)
name
. From source file:org.apache.nutch.net.urlnormalizer.host.HostURLNormalizer.java
License:Apache License
public void setConf(Configuration conf) { this.conf = conf; // get the extensions for domain urlfilter String pluginName = "urlnormalizer-host"; Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(URLNormalizer.class.getName()) .getExtensions();/*ww w. j ava 2 s .co m*/ for (int i = 0; i < extensions.length; i++) { Extension extension = extensions[i]; if (extension.getDescriptor().getPluginId().equals(pluginName)) { attributeFile = extension.getAttribute("file"); break; } } // handle blank non empty input if (attributeFile != null && attributeFile.trim().equals("")) { attributeFile = null; } if (attributeFile != null) { if (LOG.isInfoEnabled()) { LOG.info("Attribute \"file\" is defined for plugin " + pluginName + " as " + attributeFile); } } else { if (LOG.isWarnEnabled()) { LOG.warn("Attribute \"file\" is not defined in plugin.xml for plugin " + pluginName); } } // domain file and attribute "file" take precedence if defined String file = conf.get("urlnormalizer.hosts.file"); String stringRules = conf.get("urlnormalizer.hosts.rules"); if (hostsFile != null) { file = hostsFile; } else if (attributeFile != null) { file = attributeFile; } Reader reader = null; if (stringRules != null) { // takes precedence over files reader = new StringReader(stringRules); } else { reader = conf.getConfResourceAsReader(file); } try { if (reader == null) { reader = new FileReader(file); } readConfiguration(reader); } catch (IOException e) { LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); } }
From source file:org.apache.nutch.net.urlnormalizer.protocol.ProtocolURLNormalizer.java
License:Apache License
public void setConf(Configuration conf) { this.conf = conf; // get the extensions for domain urlfilter String pluginName = "urlnormalizer-protocol"; Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(URLNormalizer.class.getName()) .getExtensions();//from w ww . j ava 2 s. c o m for (int i = 0; i < extensions.length; i++) { Extension extension = extensions[i]; if (extension.getDescriptor().getPluginId().equals(pluginName)) { attributeFile = extension.getAttribute("file"); break; } } // handle blank non empty input if (attributeFile != null && attributeFile.trim().equals("")) { attributeFile = null; } if (attributeFile != null) { if (LOG.isInfoEnabled()) { LOG.info("Attribute \"file\" is defined for plugin " + pluginName + " as " + attributeFile); } } else { if (LOG.isWarnEnabled()) { LOG.warn("Attribute \"file\" is not defined in plugin.xml for plugin " + pluginName); } } // domain file and attribute "file" take precedence if defined String file = conf.get("urlnormalizer.protocols.file"); String stringRules = conf.get("urlnormalizer.protocols.rules"); if (protocolsFile != null) { file = protocolsFile; } else if (attributeFile != null) { file = attributeFile; } Reader reader = null; if (stringRules != null) { // takes precedence over files reader = new StringReader(stringRules); } else { reader = conf.getConfResourceAsReader(file); } try { if (reader == null) { reader = new FileReader(file); } readConfiguration(reader); } catch (IOException e) { LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); } }
From source file:org.apache.nutch.net.urlnormalizer.slash.SlashURLNormalizer.java
License:Apache License
public void setConf(Configuration conf) { this.conf = conf; // get the extensions for domain urlfilter String pluginName = "urlnormalizer-slash"; Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(URLNormalizer.class.getName()) .getExtensions();/*from w ww .java 2 s . c om*/ for (int i = 0; i < extensions.length; i++) { Extension extension = extensions[i]; if (extension.getDescriptor().getPluginId().equals(pluginName)) { attributeFile = extension.getAttribute("file"); break; } } // handle blank non empty input if (attributeFile != null && attributeFile.trim().equals("")) { attributeFile = null; } if (attributeFile != null) { if (LOG.isInfoEnabled()) { LOG.info("Attribute \"file\" is defined for plugin " + pluginName + " as " + attributeFile); } } else { if (LOG.isWarnEnabled()) { LOG.warn("Attribute \"file\" is not defined in plugin.xml for plugin " + pluginName); } } // domain file and attribute "file" take precedence if defined String file = conf.get("urlnormalizer.slashes.file"); String stringRules = conf.get("urlnormalizer.slashes.rules"); if (slashesFile != null) { file = slashesFile; } else if (attributeFile != null) { file = attributeFile; } Reader reader = null; if (stringRules != null) { // takes precedence over files reader = new StringReader(stringRules); } else { reader = conf.getConfResourceAsReader(file); } try { if (reader == null) { reader = new FileReader(file); } readConfiguration(reader); } catch (IOException e) { LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); } }
From source file:org.apache.nutch.parse.AllParsingFilter.java
License:Apache License
@Override protected Reader getRulesReader(Configuration conf) throws IOException { String fileRules = conf.get(ALL_REGEX_FILE); return conf.getConfResourceAsReader(fileRules); }
From source file:org.apache.nutch.parse.jsoupfilter.JsoupParsingFilter.java
License:Apache License
@Override protected Reader getRulesReader(Configuration conf) throws IOException { String fileRules = conf.get(JSOUP_REGEX_FILE); return conf.getConfResourceAsReader(fileRules); }
From source file:org.apache.nutch.parse.nasdaq.NasdaqParsingFilter.java
License:Apache License
@Override protected Reader getRulesReader(Configuration conf) throws IOException { String fileRules = conf.get(NASDAQ_REGEX_FILE); return conf.getConfResourceAsReader(fileRules); }
From source file:org.apache.nutch.parse.NbdParsingFilter.java
License:Apache License
@Override protected Reader getRulesReader(Configuration conf) throws IOException { String fileRules = conf.get(NBD_REGEX_FILE); return conf.getConfResourceAsReader(fileRules); }
From source file:org.apache.nutch.parse.qqtech.QQTechFilter.java
License:Apache License
@Override protected Reader getRulesReader(Configuration conf) throws IOException { String fileRules = conf.get(QQTECH_REGEX_FILE); return conf.getConfResourceAsReader(fileRules); }
From source file:org.apache.nutch.parse.seekingalpha.SeekingAlphaFilter.java
License:Apache License
@Override protected Reader getRulesReader(Configuration conf) throws IOException { String fileRules = conf.get(SEEKINGALPHA_REGEX_FILE); return conf.getConfResourceAsReader(fileRules); }
From source file:org.apache.nutch.parse.SinaParsingFilter.java
License:Apache License
@Override protected Reader getRulesReader(Configuration conf) throws IOException { String fileRules = conf.get(SINA_REGEX_FILE); return conf.getConfResourceAsReader(fileRules); }