opennlp.tools.jsmlearning.FeatureSpaceCoverageProcessor.java Source code

Java tutorial

Introduction

Here is the source code for opennlp.tools.jsmlearning.FeatureSpaceCoverageProcessor.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package opennlp.tools.jsmlearning;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.StringUtils;

public class FeatureSpaceCoverageProcessor {

    public Map<String, Integer> paramMap = new HashMap<String, Integer>();
    public String[] header;
    String[] attributes;

    public FeatureSpaceCoverageProcessor() {

    }

    public void initParamMap(String[] attributes, String[] header) {
        this.header = header;
        this.attributes = attributes;
        for (int m = 0; m < header.length; m++) {
            paramMap.put(header[m], m);
        }
    }

    // distance between array and array
    public Float calcDistance(String[] seed, String[] candidate) throws Exception {
        if (paramMap.isEmpty())
            throw new Exception("paramMap.isEmpty()");

        Float score = 0f;
        int p1 = paramMap.get("First Level Category");
        int p2 = paramMap.get("Second Level Category");
        if (seed[p1].equals(candidate[p1])) {
            if (seed[p2].equals(candidate[p2]))
                score = score + 0.0000001f;
            else
                score = score + 0.01f;
        } else
            return 100000f;

        try {
            int p3 = paramMap.get("Latitude");
            int p4 = paramMap.get("Longitude");
            Double latDiff = Math.abs(Double.parseDouble(seed[p3]) - Double.parseDouble(candidate[p3]));
            Double longDiff = Math.abs(Double.parseDouble(seed[p4]) - Double.parseDouble(candidate[p4]));
            if (latDiff > 1 || longDiff > 1)
                return 1000000f;
            else
                score += latDiff.floatValue() / 100.0f + longDiff.floatValue() / 100.0f;
        } catch (Exception e) {
            return 1000000f;
        }

        return score;
    }

    // distance between matrix and array
    public Float calcDistance(String[][] seed, String[] candidate) throws Exception {
        if (paramMap.isEmpty())
            throw new Exception("paramMap.isEmpty()");

        Float score = 0f, catScore = 10000f, currCatScore = 10000000f;

        int p1 = paramMap.get("First Level Category");
        int p2 = paramMap.get("Second Level Category");
        for (int v = 0; v < seed[0].length; v++) {
            if (seed[p1][v].equals(candidate[p1])) {
                if (seed[p2][v].equals(candidate[p2]))
                    currCatScore = 0.0000001f;
                else
                    currCatScore = 0.01f;
            }
            if (catScore > currCatScore) // if found closer, update
                catScore = currCatScore;
        }
        score = catScore;
        if (score > 1000000f)
            return 10000000f;

        Float latLongScore = 100000f, currLatLongScore = 10000000f;
        for (int v = 0; v < seed[0].length; v++) {
            try {
                int p3 = paramMap.get("Latitude");
                int p4 = paramMap.get("Longitude");
                if (seed[p3][v].equals("") || seed[p4][v].equals("") || candidate[p3].equals("")
                        || candidate[p4].equals(""))
                    continue;
                Double latDiff = Math.abs(Double.parseDouble(seed[p3][v]) - Double.parseDouble(candidate[p3]));
                Double longDiff = Math.abs(Double.parseDouble(seed[p4][v]) - Double.parseDouble(candidate[p4]));
                if (!(latDiff > 1 || longDiff > 1))
                    currLatLongScore = latDiff.floatValue() / 100.0f + longDiff.floatValue() / 100.0f;
            } catch (Exception e) {
                //return 1000000f;
            }
            if (latLongScore > currLatLongScore)
                latLongScore = currLatLongScore;

        }
        if (latLongScore > 10000)
            return 10000f;
        score += latLongScore;
        return score;
    }

    public Integer getIdForAttributeName(String key) {
        Integer res = paramMap.get(key);
        try {
            res.toString();
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            System.out.println("wrong key" + key);
        }
        return res;

    }

    public String getAttribNameForId(Integer id) {
        return header[id];
    }

    public Map<String, String> computeIntersection(String[] line1, String[] line2) {

        Map<String, String> attr_value = new HashMap<String, String>();
        for (String attr : attributes) {
            int attrIndex = getIdForAttributeName(attr);
            String v1 = line1[attrIndex].toLowerCase().replace("\"", "").replace(",  ", ", ").replace(", ", ",");
            ;
            String v2 = line2[attrIndex].toLowerCase().replace("\"", "").replace(",  ", ", ").replace(", ", ",");
            ;
            String valArr1Str = StringUtils.substringBetween(v1, "{", "}");
            String valArr2Str = StringUtils.substringBetween(v2, "{", "}");
            if (valArr1Str == null || valArr2Str == null) { // we assume single value, not an array of values
                if (v1.equals(v2)) {
                    attr_value.put(attr, v1);
                }
            } else {
                valArr1Str = valArr1Str.replaceAll(", ", ",");
                valArr2Str = valArr2Str.replaceAll(", ", ",");
                String[] valArr1 = valArr1Str.split(",");
                String[] valArr2 = valArr2Str.split(",");
                List<String> valList1 = new ArrayList<String>(Arrays.asList(valArr1));
                List<String> valList2 = new ArrayList<String>(Arrays.asList(valArr2));
                valList1.retainAll(valList2);
                /* verification of coverage
                valList1.retainAll(valList2);
                    
                List<String> vl1 = new ArrayList<String>(Arrays.asList(valArr1));
                valList1.retainAll(vl1); */

                if (!valList1.isEmpty()) {
                    v1 = "{" + valList1.toString().replace("[", " ").replace("]", " ").trim() + "}";
                    attr_value.put(attr, v1);
                }

            }
        }
        return attr_value;
    }

    public boolean ruleCoversCase(Map<String, String> attr_value, String[] line) {
        boolean soFarCovers = true;
        for (String attr : attributes) {
            int attrIndex = getIdForAttributeName(attr);
            String rule = attr_value.get(attr);
            if (rule == null)
                continue; // no constraint
            rule = rule.toLowerCase().replace("\"", "").replace(",  ", ",").replace(", ", ",");
            String vCase = line[attrIndex].toLowerCase().replace("\"", "").replace(",  ", ",").replace(", ", ",");
            if (vCase == null) {// rule for this attribute exists but case has no value
                soFarCovers = false;
                return false;
            }

            String valArrCaseStr = StringUtils.substringBetween(vCase, "{", "}");
            String valArrRuleStr = StringUtils.substringBetween(rule, "{", "}");
            if (valArrCaseStr == null || valArrRuleStr == null) { // we assume single value, not an array of values
                if (!vCase.equals(rule)) {
                    soFarCovers = false;
                    return false;
                }
            } else {
                String[] valArrCase = valArrCaseStr.split(",");
                String[] valArrRule = valArrRuleStr.split(",");
                List<String> valListCase = new ArrayList<String>(Arrays.asList(valArrCase));
                List<String> valListRule = new ArrayList<String>(Arrays.asList(valArrRule));

                int ruleSize = valListRule.size();
                //System.out.println(valListRule);
                //System.out.println(valListCase);

                // rule members are subset of case
                valListRule.retainAll(valListCase);

                //System.out.println(valListRule);

                if (ruleSize != valListRule.size()) {
                    soFarCovers = false;
                    return false;
                }

            }
        }
        return soFarCovers;
    }

    public boolean ruleCoversRule(Map<String, String> attr_value, Map<String, String> line) {
        boolean soFarCovers = true;
        for (String attr : attributes) {
            int attrIndex = getIdForAttributeName(attr);
            String rule = attr_value.get(attr);
            if (rule == null)
                continue; // no constraint

            String vRuleBeingCovered = line.get(attr);
            if (vRuleBeingCovered == null) {// rule for this attribute exists but RuleBeingCovered has no value
                soFarCovers = false;
                return false;
            }

            String valArrRuleBeingCoveredStr = StringUtils.substringBetween(vRuleBeingCovered, "{", "}");
            String valArrRuleStr = StringUtils.substringBetween(rule, "{", "}");
            if (valArrRuleBeingCoveredStr == null || valArrRuleStr == null) { // we assume single value, not an array of values
                if (!vRuleBeingCovered.equals(rule)) {
                    soFarCovers = false;
                    return false;
                }
            } else {
                String[] valArrRuleBeingCovered = valArrRuleBeingCoveredStr.split(",");
                String[] valArrRule = valArrRuleStr.split(",");
                List<String> valListRuleBeingCovered = new ArrayList<String>(Arrays.asList(valArrRuleBeingCovered));
                List<String> valListRule = new ArrayList<String>(Arrays.asList(valArrRule));
                for (String r : valListRule) {
                    if (!strListContainsMember(valListRuleBeingCovered, r)) {
                        soFarCovers = false;
                        return false;
                    }
                }

            }
        }
        return soFarCovers;
    }

    public Map<String, String> computeIntersection(Map<String, String> rule1, Map<String, String> rule2) {
        Map<String, String> attr_value = new HashMap<String, String>();
        for (String attr : attributes) {
            int attrIndex = getIdForAttributeName(attr);
            String v1 = rule1.get(attr);
            String v2 = rule2.get(attr);
            if (v1 == null || v2 == null)
                continue;
            String valArr1Str = StringUtils.substringBetween(v1, "{", "}");
            String valArr2Str = StringUtils.substringBetween(v2, "{", "}");
            if (valArr1Str == null || valArr2Str == null) { // we assume single value, not an array of values
                if (v1.equals(v2)) {
                    attr_value.put(attr, v1);
                }
            } else {
                valArr1Str = valArr1Str.replaceAll(", ", ",");
                valArr2Str = valArr2Str.replaceAll(", ", ",");
                String[] valArr1 = valArr1Str.split(",");
                String[] valArr2 = valArr2Str.split(",");
                List<String> valList1 = new ArrayList<String>(Arrays.asList(valArr1));
                List<String> valList2 = new ArrayList<String>(Arrays.asList(valArr2));
                valList1.retainAll(valList2);
                if (!valList1.isEmpty()) {
                    v1 = "{" + valList1.toString().replace("[", " ").replace("]", " ").trim() + "}";
                    attr_value.put(attr, v1);
                }

            }
        }
        return attr_value;
    }

    private boolean strListContainsMember(List<String> valListCase, String r) {
        boolean bContains = false;
        for (String m : valListCase) {
            if (m.startsWith(r) || r.startsWith(m))
                return true;

        }
        return false;
    }
}