gate.termraider.util.Utilities.java Source code

Java tutorial

Introduction

Here is the source code for gate.termraider.util.Utilities.java

Source

/*
 *  Copyright (c) 2008--2014, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  $Id: Utilities.java 17718 2014-03-20 20:40:06Z adamfunk $
 */
package gate.termraider.util;

import gate.Annotation;
import gate.Document;
import gate.FeatureMap;
import gate.creole.ANNIEConstants;
import java.io.File;
import java.net.URL;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.WordUtils;

public class Utilities implements ANNIEConstants {

    public static final String EXTENSION_CSV = "csv";

    private static double log10of2;

    static {
        log10of2 = Math.log10(2.0);
    }

    /** This is a little dodgy because it forces the new value 
     * to be Integer; to be used carefully.
     * @param map
     * @param key
     * @param increment
     * @return
     */
    public static int incrementMap(Map<Term, Number> map, Term key, int increment) {
        int count = 0;
        if (map.containsKey(key)) {
            count = map.get(key).intValue();
        }
        count += increment;
        map.put(key, Integer.valueOf(count));
        return count;
    }

    public static double meanDoubleList(List<Double> list) {
        if (list.isEmpty()) {
            return 0.0;
        }
        // implied else
        double total = 0.0;
        for (Double item : list) {
            total += item;
        }
        return total / ((double) list.size());
    }

    public static Double convertToDouble(Object x) {
        if (x instanceof Number) {
            return ((Number) x).doubleValue();
        }

        return Double.parseDouble(x.toString());
    }

    public static String cleanAndCamelCase(String input) {
        // remove leading & trailing whitespace then camelCase
        return WordUtils.capitalize(StringUtils.trimToEmpty(input)).replaceAll("\\s+", "");
    }

    /* The following methods are NOT cruft but are used in some JAPEs,
     * so don't delete them.
     */
    public static void setCanonicalFromLemma(Annotation token, Document doc, String lemmaFeatureName) {
        String canonical = getCanonicalFromLemma(token, doc, lemmaFeatureName);
        token.getFeatures().put("canonical", canonical);
    }

    public static String getCanonicalFromLemma(Annotation token, Document doc, String lemmaFeatureName) {
        FeatureMap fm = token.getFeatures();
        String canonical = "";
        if (fm.containsKey(lemmaFeatureName)) {
            canonical = fm.get(lemmaFeatureName).toString().toLowerCase();
        }

        if (canonical.equals("") || canonical.equals("<unknown>")) {
            if (fm.containsKey(TOKEN_STRING_FEATURE_NAME)) {
                canonical = fm.get(TOKEN_STRING_FEATURE_NAME).toString().toLowerCase();
            } else {
                canonical = gate.Utils.stringFor(doc, token).toLowerCase();
            }
        }

        return canonical;
    }

    public static void setCanonicalFromString(Annotation token, Document doc) {
        String canonical = getCanonicalFromString(token, doc);
        token.getFeatures().put("canonical", canonical);
    }

    public static String getCanonicalFromString(Annotation token, Document doc) {
        FeatureMap fm = token.getFeatures();
        String canonical = "";
        if (fm.containsKey(TOKEN_STRING_FEATURE_NAME)) {
            canonical = fm.get(TOKEN_STRING_FEATURE_NAME).toString().toLowerCase();
        } else {
            canonical = gate.Utils.stringFor(doc, token).toLowerCase();
        }

        return canonical;
    }

    public static String sourceOrName(Document document) {
        URL url = document.getSourceUrl();
        if (url == null) {
            return document.getName();
        }

        //implied else
        return url.toString();
    }

    public static File addExtensionIfNotExtended(File file, String extension) {
        String name = file.getName();
        if (name.contains(".")) {
            return file;
        }

        // implied else: add extension
        File parentDir = file.getParentFile();
        if (extension.startsWith(".")) {
            name = name + extension;
        } else {
            name = name + "." + extension;
        }

        return new File(parentDir, name);
    }

    public static String integerToString(Integer i) {
        if (i == null) {
            return "<null>";
        }
        // implied else
        return Integer.toString(i);
    }

    public static double log2(double input) {
        /*  log_a x = log_b x * log_a b
         * 
         *  log_b x = log_a x / log_a b
         */
        return Math.log10(input) / log10of2;
    }

    public static void addToMapSet(Map<Term, Set<String>> map, Term key, String value) {
        Set<String> valueSet;
        if (map.containsKey(key)) {
            valueSet = map.get(key);
        } else {
            valueSet = new HashSet<String>();
        }

        valueSet.add(value);
        map.put(key, valueSet);
    }

    public static void setScoreTermValue(Map<ScoreType, Map<Term, Number>> map, ScoreType type, Term term,
            Number value) {
        Map<Term, Number> submap;
        if (map.containsKey(type)) {
            submap = map.get(type);
        } else {
            submap = new HashMap<Term, Number>();
        }

        submap.put(term, value);
        map.put(type, submap);
    }

    /**
     * Forces the ultimate value to be Integer. 
     */
    public static void incrementScoreTermValue(Map<ScoreType, Map<Term, Number>> map, ScoreType type, Term term,
            Integer increment) {
        Map<Term, Number> submap;
        if (map.containsKey(type)) {
            submap = map.get(type);
        } else {
            submap = new HashMap<Term, Number>();
        }

        int count;
        if (submap.containsKey(term)) {
            count = submap.get(term).intValue();
        } else {
            count = 0;
        }

        count += increment.intValue();
        submap.put(term, count);
        map.put(type, submap);
    }

    public static Set<String> getStringSetFromMap(Map<Term, Set<String>> map, Term key) {
        if (map.containsKey(key)) {
            return map.get(key);
        }

        //implied else
        Set<String> valueSet = new HashSet<String>();
        map.put(key, valueSet);
        return valueSet;
    }

}