annis.gui.flatquerybuilder.ReducingStringComparator.java Source code

Java tutorial

Introduction

Here is the source code for annis.gui.flatquerybuilder.ReducingStringComparator.java

Source

/*
 * Copyright 2013 Corpuslinguistic working group Humboldt University Berlin.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package annis.gui.flatquerybuilder;

import com.vaadin.server.ClassResource;
import com.vaadin.ui.Notification;
import java.io.IOException;
import java.util.HashMap;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/**
 * @author klotzmaz
 * @author tom
 */
public class ReducingStringComparator {
    private HashMap<String, HashMap> ALLOGRAPHS;
    private static final String READING_ERROR_MESSAGE = "ERROR: Unable to load mapping file(s)!";
    private static String MAPPING_FILE = "mapfile.fqb";

    public ReducingStringComparator() {
        initAlphabet();
        readMappings();
    }

    public HashMap<String, HashMap> getMappings() {
        return ALLOGRAPHS;
    }

    private HashMap initAlphabet() {
        HashMap<Character, Character> h = new HashMap<>();

        //standard-alphabet:
        for (int i = 97; i < 122; i++) {
            char c = (char) i;
            h.put(c, c);
            h.put(Character.toUpperCase(c), c);
        }

        return h;
    }

    private void readMappings() {
        ALLOGRAPHS = new HashMap<>();
        ClassResource cr = new ClassResource(ReducingStringComparator.class, MAPPING_FILE);
        try {
            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
            DocumentBuilder db = dbf.newDocumentBuilder();
            Document mappingD = db.parse(cr.getStream().getStream());

            NodeList mappings = mappingD.getElementsByTagName("mapping");
            for (int i = 0; i < mappings.getLength(); i++) {
                Element mapping = (Element) mappings.item(i);
                String mappingName = mapping.getAttribute("name");
                HashMap mappingMap = initAlphabet();
                NodeList variants = mapping.getElementsByTagName("variant");
                for (int j = 0; j < variants.getLength(); j++) {
                    Element var = (Element) variants.item(j);
                    char varvalue = var.getAttribute("value").charAt(0);
                    Element character = (Element) var.getParentNode();
                    char charactervalue = character.getAttribute("value").charAt(0);
                    mappingMap.put(varvalue, charactervalue);
                }
                ALLOGRAPHS.put(mappingName, mappingMap);
            }

        } catch (SAXException e) {
            e = null;
            Notification.show(READING_ERROR_MESSAGE);
        } catch (IOException e) {
            e = null;
            Notification.show(READING_ERROR_MESSAGE);
        } catch (ParserConfigurationException e) {
            e = null;
            Notification.show(READING_ERROR_MESSAGE);
        }

    }

    private String removeCombiningCharacters(String s) {
        String t = "";

        for (int i = 0; i < s.length(); i++) {
            char c = s.charAt(i);
            int cp = (int) c;
            if (!(((cp > 767) & (cp < 880)) | ((cp > 1154) & (cp < 1162)) | (cp == 1619)
                    | ((cp > 2026) & (cp < 2036)) | (cp == 4352) | ((cp > 4956) & (cp < 4960)) | (cp == 6783)
                    | ((cp > 7018) & (cp < 7028)) | ((cp > 7615) & (cp < 7655)) | ((cp > 7675) & (cp < 7680))
                    | ((cp > 8399) & (cp < 8433)) | ((cp > 11502) & (cp < 11506)) | ((cp > 11743) & (cp < 11776))
                    | ((cp > 12440) & (cp < 12443)) | ((cp > 42606) & (cp < 42611)) | ((cp > 42611) & (cp < 42622))
                    | ((cp > 42654) & (cp < 42738)) | ((cp > 43231) & (cp < 43250)) | ((cp > 65055) & (cp < 65063))
                    | (cp == 66045) | ((cp > 119140) & (cp < 119146)) | ((cp > 119148) & (cp < 119155))
                    | ((cp > 119162) & (cp < 119171)) | ((cp > 119172) & (cp < 119180))
                    | ((cp > 119209) & (cp < 119214)) | ((cp > 119361) & (cp < 119365)))) {
                t = t + c;
            }
        }

        return t;
    }

    public int compare(Object a, Object b, String mapname)
    /*
     * use with Strings only
     * 
     * <0: a<b
     * =0: a=b
     * >0: a>b
     * 
     * compare() is split in 2 methods to make contains()
     * more comfortable (contains() could use compare2(),
     * so that a multiple application of removeCombiningCharacters() 
     * on the same string is avoided)
     * 
     */
    {
        String s1 = removeCombiningCharacters((String) a);
        String s2 = removeCombiningCharacters((String) b);
        //compare without spaces
        return compare2(s1.replace(" ", ""), s2.replace(" ", ""), mapname);
    }

    private int compare2(String s1, String s2, String mapname) {
        int l = s1.length();

        if (l < s2.length()) {
            return -1;
        } else if (l > s2.length()) {
            return 1;
        }

        for (int i = 0; i < l; i++) {
            char c1 = s1.charAt(i);
            char c2 = s2.charAt(i);
            HashMap<Character, Character> curMap = ALLOGRAPHS.get(mapname);
            char rc1 = curMap.containsKey(c1) ? curMap.get(c1) : c1;

            char rc2 = (curMap.containsKey(c2)) ? curMap.get(c2) : c2;

            if (rc1 < rc2) {
                return -1;
            } else if (rc1 > rc2) {
                return 1;
            }
        }
        return 0;
    }

    public boolean startsWith(String fullSequence, String subSequence, String mapname) {
        //kill diacritics:
        String subS = removeCombiningCharacters(subSequence);
        String fullS = removeCombiningCharacters(fullSequence);
        //remove spaces:
        subS = subS.replace(" ", "");
        fullS = fullS.replace(" ", "");
        int l = subS.length();
        if (fullS.length() < l) {
            return false;
        }
        return (compare2(fullS.substring(0, l), subS, mapname) == 0);
    }

    public boolean contains(String fullSequence, String subSequence, String mapname) {
        //kill diacritics:    
        String subS = removeCombiningCharacters(subSequence);
        String fullS = removeCombiningCharacters(fullSequence);
        //remove spaces:
        subS = subS.replace(" ", "");
        fullS = fullS.replace(" ", "");
        int l = subS.length();
        for (int i = 0; i < fullS.length() - l + 1; i++) {
            if (compare2(fullS.substring(i, i + l), subS, mapname) == 0) {
                return true;
            }
        }
        return false;
    }
}