Example usage for org.apache.hadoop.conf Configuration getConfResourceAsReader

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getConfResourceAsReader.

Prototype

public Reader getConfResourceAsReader(String name)

Source Link

Document

Get a Reader attached to the configuration resource with the given name.

Usage

From source file:com.atlantbh.nutch.filter.index.omit.config.OmitIndexingFilterConfiguration.java

License:Apache License

public static OmitIndexingFilterConfiguration getInstance(Configuration configuration) {
    try {//from   w  ww  .ja  v  a2 s  .  co m

        // Get configuration from Nutch /conf folder
        Reader configReader = configuration
                .getConfResourceAsReader(configuration.get(CONFIG_FILE_PATH_PROPERTY));

        // Initialize JAXB
        JAXBContext context = JAXBContext.newInstance(new Class[] { OmitIndexingFilterConfiguration.class,
                OmitIndexingFilterConfigurationEntry.class, FilteringType.class, Target.class });
        Unmarshaller unmarshaller = context.createUnmarshaller();

        // Initialize configuration
        OmitIndexingFilterConfiguration xPathFilterConfiguration = (OmitIndexingFilterConfiguration) unmarshaller
                .unmarshal(configReader);
        return xPathFilterConfiguration;

    } catch (JAXBException e) {
        log.error("Configuration initialization error!");
    }

    return null;
}

From source file:com.atlantbh.nutch.index.alternativedataflow.conf.AlternativeDataFlowIndexingFilterConfiguration.java

License:Apache License

public static AlternativeDataFlowIndexingFilterConfiguration getInstance(Configuration configuration) {
    try {//from   ww  w .  j ava 2s  .c  o  m

        // Get configuration from Nutch /conf folder
        Reader configReader = configuration
                .getConfResourceAsReader(configuration.get(CONFIG_FILE_PATH_PROPERTY));

        // Initialize JAXB
        JAXBContext context = JAXBContext.newInstance(
                new Class[] { AlternativeDataFlowIndexingFilterConfiguration.class, Entry.class, Field.class });
        Unmarshaller unmarshaller = context.createUnmarshaller();

        // Initialize configuration
        AlternativeDataFlowIndexingFilterConfiguration xPathFilterConfiguration = (AlternativeDataFlowIndexingFilterConfiguration) unmarshaller
                .unmarshal(configReader);
        return xPathFilterConfiguration;

    } catch (JAXBException e) {
        log.error("Configuration initialization error!");
    }

    return null;
}

From source file:de.informera.dev.nutchManager.thirdParty.RegexURLFilter.java

License:Apache License

/**
 * Rules specified as a config property will override rules specified
 * as a config file.// www  . j  ava  2s.c  om
 */
protected Reader getRulesReader(Configuration conf) throws IOException {
    String stringRules = conf.get(URLFILTER_REGEX_RULES);
    if (stringRules != null) {
        return new StringReader(stringRules);
    }
    String fileRules = conf.get(URLFILTER_REGEX_FILE);
    return conf.getConfResourceAsReader(fileRules);
}

From source file:org.apache.nutch.analysis.CommonGrams.java

License:Apache License

/** Construct using the provided config file. */
private void init(Configuration conf) {
    // First, try to retrieve some commonTerms cached in configuration.
    commonTerms = (HashMap) conf.getObject(KEY);
    if (commonTerms != null) {
        return;//from  ww  w.  ja v  a 2 s  .  c  o  m
    }

    // Otherwise, read the terms.file
    try {
        commonTerms = new HashMap();
        Reader reader = conf.getConfResourceAsReader(conf.get("analysis.common.terms.file"));
        BufferedReader in = new BufferedReader(reader);
        String line;
        while ((line = in.readLine()) != null) {
            line = line.trim();
            if (line.startsWith("#") || "".equals(line)) // skip comments
                continue;
            TokenStream ts = new NutchDocumentTokenizer(new StringReader(line));
            Token token = ts.next();
            if (token == null) {
                if (LOG.isWarnEnabled()) {
                    LOG.warn("Line does not contain a field name: " + line);
                }
                continue;
            }
            String field = token.termText();
            token = ts.next();
            if (token == null) {
                if (LOG.isWarnEnabled()) {
                    LOG.warn("Line contains only a field name, no word: " + line);
                }
                continue;
            }
            String gram = token.termText();
            while ((token = ts.next()) != null) {
                gram = gram + SEPARATOR + token.termText();
            }
            HashSet table = (HashSet) commonTerms.get(field);
            if (table == null) {
                table = new HashSet();
                commonTerms.put(field, table);
            }
            table.add(gram);
        }
        conf.setObject(KEY, commonTerms);
    } catch (IOException e) {
        throw new RuntimeException(e.toString());
    }
}

From source file:org.apache.nutch.crawl.MimeAdaptiveFetchSchedule.java

License:Apache License

public void setConf(Configuration conf) {
    super.setConf(conf);
    if (conf == null)
        return;/*w  w  w. j av a  2  s . c o m*/

    // Read and set the default INC and DEC rates in case we cannot set values based on MIME-type
    defaultIncRate = conf.getFloat(SCHEDULE_INC_RATE, 0.2f);
    defaultDecRate = conf.getFloat(SCHEDULE_DEC_RATE, 0.2f);

    // Where's the mime/factor file?
    Reader mimeFile = conf.getConfResourceAsReader(conf.get(SCHEDULE_MIME_FILE, "adaptive-mimetypes.txt"));

    try {
        readMimeFile(mimeFile);
    } catch (IOException e) {
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
    }
}

From source file:org.apache.nutch.indexer.filter.MimeTypeIndexingFilter.java

License:Apache License

@Override
public void setConf(Configuration conf) {
    this.conf = conf;
    MIME = new MimeUtil(conf);

    // load the file of the values
    String file = conf.get(MIMEFILTER_REGEX_FILE, "");

    if (file != null) {
        if (file.isEmpty()) {
            LOG.warn(//from   w  w w  .  j a  v  a 2s.com
                    String.format("Missing %s property, ALL mimetypes will be allowed", MIMEFILTER_REGEX_FILE));
        } else {
            Reader reader = conf.getConfResourceAsReader(file);

            try {
                readConfiguration(reader);
            } catch (IOException e) {
                if (LOG.isErrorEnabled()) {
                    LOG.error(e.getMessage());
                }

                throw new RuntimeException(e.getMessage(), e);
            }
        }
    }
}

From source file:org.apache.nutch.indexer.urlfilter.UrlIndexingFilter.java

License:Apache License

@Override
protected Reader getRulesReader(Configuration conf) throws IOException {
    String fileRules = conf.get(URLINDEXINGFILTER_REGEX_FILE);
    return conf.getConfResourceAsReader(fileRules);
}

From source file:org.apache.nutch.indexwriter.elastic.ElasticIndexWriter.java

License:Apache License

@Override
public void open(Configuration job) throws IOException {
    clusterName = job.get(ElasticConstants.CLUSTER);
    host = job.get(ElasticConstants.HOST);
    port = job.getInt(ElasticConstants.PORT, 9300);

    Builder settingsBuilder = ImmutableSettings.settingsBuilder().classLoader(Settings.class.getClassLoader());

    BufferedReader reader = new BufferedReader(job.getConfResourceAsReader("elasticsearch.conf"));
    String line;/*  ww w  .java 2 s  .  co  m*/
    String parts[];

    while ((line = reader.readLine()) != null) {
        if (StringUtils.isNotBlank(line) && !line.startsWith("#")) {
            line.trim();
            parts = line.split("=");

            if (parts.length == 2) {
                settingsBuilder.put(parts[0].trim(), parts[1].trim());
            }
        }
    }

    if (StringUtils.isNotBlank(clusterName))
        settingsBuilder.put("cluster.name", clusterName);

    // Set the cluster name and build the settings
    Settings settings = settingsBuilder.build();

    // Prefer TransportClient
    if (host != null && port > 1) {
        client = new TransportClient(settings).addTransportAddress(new InetSocketTransportAddress(host, port));
    } else if (clusterName != null) {
        node = nodeBuilder().settings(settings).client(true).node();
        client = node.client();
    }

    bulk = client.prepareBulk();
    defaultIndex = job.get(ElasticConstants.INDEX, "nutch");
    maxBulkDocs = job.getInt(ElasticConstants.MAX_BULK_DOCS, DEFAULT_MAX_BULK_DOCS);
    maxBulkLength = job.getInt(ElasticConstants.MAX_BULK_LENGTH, DEFAULT_MAX_BULK_LENGTH);
}

From source file:org.apache.nutch.indexwriter.elastic2.ElasticIndexWriter.java

License:Apache License

@Override
public void open(Configuration job) throws IOException {
    clusterName = job.get(ElasticConstants.CLUSTER);
    host = job.get(ElasticConstants.HOST);
    port = job.getInt(ElasticConstants.PORT, 9300);

    Builder settingsBuilder = Settings.builder();

    BufferedReader reader = new BufferedReader(job.getConfResourceAsReader("elasticsearch.conf"));
    String line;/*w w w  .  j ava2s .c o m*/
    String parts[];

    while ((line = reader.readLine()) != null) {
        if (StringUtils.isNotBlank(line) && !line.startsWith("#")) {
            line.trim();
            parts = line.split("=");

            if (parts.length == 2) {
                settingsBuilder.put(parts[0].trim(), parts[1].trim());
            }
        }
    }

    if (StringUtils.isNotBlank(clusterName))
        settingsBuilder.put("cluster.name", clusterName);

    // Set the cluster name and build the settings
    Settings settings = settingsBuilder.build();

    // Prefer TransportClient
    if (host != null && port > 1) {
        client = TransportClient.builder().settings(settings).build()
                .addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(host), port));
    } else if (clusterName != null) {
        node = nodeBuilder().settings(settings).client(true).node();
        client = node.client();
    }

    bulk = client.prepareBulk();
    defaultIndex = job.get(ElasticConstants.INDEX, "nutch");
    maxBulkDocs = job.getInt(ElasticConstants.MAX_BULK_DOCS, DEFAULT_MAX_BULK_DOCS);
    maxBulkLength = job.getInt(ElasticConstants.MAX_BULK_LENGTH, DEFAULT_MAX_BULK_LENGTH);
}

From source file:org.apache.nutch.net.RegexURLFilter.java

License:Apache License

/**
 * Rules specified as a config property will override rules specified as a
 * config file./*ww w  .ja  v  a 2s  . c o m*/
 */
protected Reader getRulesReader(Configuration conf) throws IOException {
    String stringRules = conf.get(URLFILTER_REGEX_RULES);
    if (stringRules != null) {
        if (LOG.isDebugEnabled()) {
            // LOG.debug("Url filter regex rules : \n" + stringRules);
        }

        return new StringReader(stringRules);
    }

    String fileRules = conf.get(URLFILTER_REGEX_FILE);
    return conf.getConfResourceAsReader(fileRules);
}