Example usage for org.apache.hadoop.conf Configuration getFloat

List of usage examples for org.apache.hadoop.conf Configuration getFloat

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getFloat.

Prototype

public float getFloat(String name, float defaultValue) 

Source Link

Document

Get the value of the name property as a float.

Usage

From source file:org.apache.nutch.analysis.lang.LanguageQueryFilter.java

License:Apache License

public void setConf(Configuration conf) {
    this.conf = conf;
    setBoost(conf.getFloat("query.lang.boost", 0.0f));
}

From source file:org.apache.nutch.crawl.MimeAdaptiveFetchSchedule.java

License:Apache License

public void setConf(Configuration conf) {
    super.setConf(conf);
    if (conf == null)
        return;//from w  ww. java 2  s . c o  m

    // Read and set the default INC and DEC rates in case we cannot set values based on MIME-type
    defaultIncRate = conf.getFloat(SCHEDULE_INC_RATE, 0.2f);
    defaultDecRate = conf.getFloat(SCHEDULE_DEC_RATE, 0.2f);

    // Where's the mime/factor file?
    Reader mimeFile = conf.getConfResourceAsReader(conf.get(SCHEDULE_MIME_FILE, "adaptive-mimetypes.txt"));

    try {
        readMimeFile(mimeFile);
    } catch (IOException e) {
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
    }
}

From source file:org.apache.nutch.fetcher.FetchItemQueues.java

License:Apache License

public FetchItemQueues(Configuration conf) {
    this.conf = conf;
    this.maxThreads = conf.getInt("fetcher.threads.per.queue", 1);
    queueMode = conf.get("fetcher.queue.mode", QUEUE_MODE_HOST);
    queueMode = checkQueueMode(queueMode);
    LOG.info("Using queue mode : " + queueMode);

    this.crawlDelay = (long) (conf.getFloat("fetcher.server.delay", 1.0f) * 1000);
    this.minCrawlDelay = (long) (conf.getFloat("fetcher.server.min.delay", 0.0f) * 1000);
    this.timelimit = conf.getLong("fetcher.timelimit", -1);
    this.maxExceptionsPerQueue = conf.getInt("fetcher.max.exceptions.per.queue", -1);
}

From source file:org.apache.nutch.microformats.reltag.RelTagQueryFilter.java

License:Apache License

public void setConf(Configuration conf) {
    this.conf = conf;
    setBoost(conf.getFloat("query.tag.boost", 1.0f));
}

From source file:org.apache.nutch.scoring.link.LinkAnalysisScoringFilter.java

License:Apache License

public void setConf(Configuration conf) {
    this.conf = conf;
    normalizedScore = conf.getFloat("link.analyze.normalize.score", 1.00f);
}

From source file:org.apache.nutch.scoring.opic.OPICScoringFilter.java

License:Apache License

public void setConf(Configuration conf) {
    this.conf = conf;
    scorePower = conf.getFloat("indexer.score.power", 0.5f);
    internalScoreFactor = conf.getFloat("db.score.link.internal", 1.0f);
    externalScoreFactor = conf.getFloat("db.score.link.external", 1.0f);
    countFiltered = conf.getBoolean("db.score.count.filtered", false);
}

From source file:org.apache.nutch.scoring.opic.TestOPICScoringFilter.java

License:Apache License

@Before
public void setUp() throws Exception {

    Configuration conf = NutchConfiguration.create();
    // LinkedHashMap dbWebPages is used instead of a persistent
    // data store for this test class
    Map<String, Map<WebPage, List<ScoreDatum>>> dbWebPages = new LinkedHashMap<String, Map<WebPage, List<ScoreDatum>>>();

    // All WebPages stored in this map with an initial true value.
    // After processing, it is set to false.
    Map<String, Boolean> dbWebPagesControl = new LinkedHashMap<String, Boolean>();

    TestOPICScoringFilter self = new TestOPICScoringFilter();
    self.fillLinks();/*from   w w  w. j  ava 2s  .c  o m*/

    float scoreInjected = conf.getFloat("db.score.injected", 1.0f);

    scoringFilter = new OPICScoringFilter();
    scoringFilter.setConf(conf);

    // injecting seed list, with scored attached to webpages
    for (String url : self.seedList) {
        WebPage row = WebPage.newBuilder().build();
        row.setScore(scoreInjected);
        scoringFilter.injectedScore(url, row);

        List<ScoreDatum> scList = new LinkedList<ScoreDatum>();
        Map<WebPage, List<ScoreDatum>> webPageMap = new HashMap<WebPage, List<ScoreDatum>>();
        webPageMap.put(row, scList);
        dbWebPages.put(TableUtil.reverseUrl(url), webPageMap);
        dbWebPagesControl.put(TableUtil.reverseUrl(url), true);
    }

    // Depth Loop
    for (int i = 1; i <= DEPTH; i++) {
        Iterator<Map.Entry<String, Map<WebPage, List<ScoreDatum>>>> iter = dbWebPages.entrySet().iterator();

        // OPIC Score calculated for each website one by one
        while (iter.hasNext()) {
            Map.Entry<String, Map<WebPage, List<ScoreDatum>>> entry = iter.next();
            Map<WebPage, List<ScoreDatum>> webPageMap = entry.getValue();

            WebPage row = null;
            List<ScoreDatum> scoreList = null;
            Iterator<Map.Entry<WebPage, List<ScoreDatum>>> iters = webPageMap.entrySet().iterator();
            if (iters.hasNext()) {
                Map.Entry<WebPage, List<ScoreDatum>> values = iters.next();
                row = values.getKey();
                scoreList = values.getValue();
            }

            String reverseUrl = entry.getKey();
            String url = TableUtil.unreverseUrl(reverseUrl);
            float score = row.getScore();

            if (dbWebPagesControl.get(TableUtil.reverseUrl(url))) {
                row.setScore(scoringFilter.generatorSortValue(url, row, score));
                dbWebPagesControl.put(TableUtil.reverseUrl(url), false);
            }

            // getting outlinks from testdata
            String[] seedOutlinks = self.linkList.get(url);
            for (String seedOutlink : seedOutlinks) {
                row.getOutlinks().put(seedOutlink, "");
            }

            self.outlinkedScoreData.clear();

            // Existing outlinks are added to outlinkedScoreData
            Map<CharSequence, CharSequence> outlinks = row.getOutlinks();
            if (outlinks != null) {
                for (Entry<CharSequence, CharSequence> e : outlinks.entrySet()) {
                    int depth = Integer.MAX_VALUE;
                    self.outlinkedScoreData
                            .add(new ScoreDatum(0.0f, e.getKey().toString(), e.getValue().toString(), depth));
                }
            }
            scoringFilter.distributeScoreToOutlinks(url, row, self.outlinkedScoreData,
                    (outlinks == null ? 0 : outlinks.size()));

            // DbUpdate Reducer simulation
            for (ScoreDatum sc : self.outlinkedScoreData) {
                if (dbWebPages.get(TableUtil.reverseUrl(sc.getUrl())) == null) {
                    // Check each outlink and creates new webpages if it's not
                    // exist in database (dbWebPages)
                    WebPage outlinkRow = WebPage.newBuilder().build();
                    scoringFilter.initialScore(sc.getUrl(), outlinkRow);
                    List<ScoreDatum> newScoreList = new LinkedList<ScoreDatum>();
                    newScoreList.add(sc);
                    Map<WebPage, List<ScoreDatum>> values = new HashMap<WebPage, List<ScoreDatum>>();
                    values.put(outlinkRow, newScoreList);
                    dbWebPages.put(TableUtil.reverseUrl(sc.getUrl()), values);
                    dbWebPagesControl.put(TableUtil.reverseUrl(sc.getUrl()), true);
                } else {
                    // Outlinks are added to list for each webpage
                    Map<WebPage, List<ScoreDatum>> values = dbWebPages.get(TableUtil.reverseUrl(sc.getUrl()));
                    Iterator<Map.Entry<WebPage, List<ScoreDatum>>> value = values.entrySet().iterator();
                    if (value.hasNext()) {
                        Map.Entry<WebPage, List<ScoreDatum>> list = value.next();
                        scoreList = list.getValue();
                        scoreList.add(sc);
                    }
                }
            }
        }

        // Simulate Reducing
        for (Map.Entry<String, Map<WebPage, List<ScoreDatum>>> page : dbWebPages.entrySet()) {

            String reversedUrl = page.getKey();
            String url = TableUtil.unreverseUrl(reversedUrl);

            Iterator<Map.Entry<WebPage, List<ScoreDatum>>> rr = page.getValue().entrySet().iterator();

            List<ScoreDatum> inlinkedScoreDataList = null;
            WebPage row = null;
            if (rr.hasNext()) {
                Map.Entry<WebPage, List<ScoreDatum>> aa = rr.next();
                inlinkedScoreDataList = aa.getValue();
                row = aa.getKey();
            }
            // Scores are updated here
            scoringFilter.updateScore(url, row, inlinkedScoreDataList);
            inlinkedScoreDataList.clear();
            HashMap<String, Float> result = new HashMap<String, Float>();
            result.put(url, row.getScore());

            resultScores.put(i, result);
        }

    }
}

From source file:org.apache.nutch.scoring.pagerank.PageRankScoringFilter.java

License:Apache License

public void setConf(Configuration conf) {
    this.conf = conf;
    scorePower = conf.getFloat("indexer.score.power", 0.5f);
    internalScoreFactor = conf.getFloat("db.score.link.internal", 1.0f);
    externalScoreFactor = conf.getFloat("db.score.link.external", 1.0f);
    keywordScoreFactor = conf.getFloat("db.score.science.keywords", 1.0f);
    countFiltered = conf.getBoolean("db.score.count.filtered", false);
    try {/*from  ww  w . jav a2  s .c  o  m*/
        extractor = new LocationExtractor("IndexingDir");
    } catch (Exception e) {
        extractor = null;
    }
}

From source file:org.apache.nutch.searcher.basic.BasicQueryFilter.java

License:Apache License

public void setConf(Configuration conf) {
    this.conf = conf;
    this.FIELD_BOOSTS[URL_BOOST] = conf.getFloat("query.url.boost", 4.0f);
    this.FIELD_BOOSTS[ANCHOR_BOOST] = conf.getFloat("query.anchor.boost", 2.0f);
    this.FIELD_BOOSTS[CONTENT_BOOST] = conf.getFloat("query.content.boost", 1.0f);
    this.FIELD_BOOSTS[TITLE_BOOST] = conf.getFloat("query.title.boost", 1.5f);
    this.FIELD_BOOSTS[HOST_BOOST] = conf.getFloat("query.host.boost", 2.0f);
    this.PHRASE_BOOST = conf.getFloat("query.phrase.boost", 1.0f);
}

From source file:org.apache.nutch.searcher.LuceneQueryOptimizer.java

License:Apache License

/**
 * Construct an optimizer that caches and uses filters for required clauses
 * whose boost is zero.//from   www .j  a  v a  2 s.  com
 * 
 * @param cacheSize
 *          the number of QueryFilters to cache
 * @param threshold
 *          the fraction of documents which must contain a term
 */
public LuceneQueryOptimizer(Configuration conf) {
    final int cacheSize = conf.getInt("searcher.filter.cache.size", 16);
    this.threshold = conf.getFloat("searcher.filter.cache.threshold", 0.05f);
    this.tickLength = conf.getInt("searcher.max.time.tick_length", 200);
    this.maxTickCount = conf.getInt("searcher.max.time.tick_count", -1);
    this.maxFulltextMatchesRanked = conf.getInt(Global.MAX_FULLTEXT_MATCHES_RANKED, -1);
    this.timeoutResponse = conf.getInt(Global.TIMEOUT_INDEX_SERVERS_RESPONSE, -1);
    if (timeoutResponse > 0) {
        this.maxTickCount = timeoutResponse;
        this.tickLength = 1000;
    }
    if (this.maxTickCount > 0) {
        initTimerThread(this.tickLength);
    }
}