Java tutorial
/* * Copyright 2013 Corpuslinguistic working group Humboldt University Berlin. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package annis.gui.flatquerybuilder; import com.vaadin.server.ClassResource; import com.vaadin.ui.Notification; import java.io.IOException; import java.util.HashMap; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; /** * @author klotzmaz * @author tom */ public class ReducingStringComparator { private HashMap<String, HashMap> ALLOGRAPHS; private static final String READING_ERROR_MESSAGE = "ERROR: Unable to load mapping file(s)!"; private static String MAPPING_FILE = "mapfile.fqb"; public ReducingStringComparator() { initAlphabet(); readMappings(); } public HashMap<String, HashMap> getMappings() { return ALLOGRAPHS; } private HashMap initAlphabet() { HashMap<Character, Character> h = new HashMap<>(); //standard-alphabet: for (int i = 97; i < 122; i++) { char c = (char) i; h.put(c, c); h.put(Character.toUpperCase(c), c); } return h; } private void readMappings() { ALLOGRAPHS = new HashMap<>(); ClassResource cr = new ClassResource(ReducingStringComparator.class, MAPPING_FILE); try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document mappingD = db.parse(cr.getStream().getStream()); NodeList mappings = mappingD.getElementsByTagName("mapping"); for (int i = 0; i < mappings.getLength(); i++) { Element mapping = (Element) mappings.item(i); String mappingName = mapping.getAttribute("name"); HashMap mappingMap = initAlphabet(); NodeList variants = mapping.getElementsByTagName("variant"); for (int j = 0; j < variants.getLength(); j++) { Element var = (Element) variants.item(j); char varvalue = var.getAttribute("value").charAt(0); Element character = (Element) var.getParentNode(); char charactervalue = character.getAttribute("value").charAt(0); mappingMap.put(varvalue, charactervalue); } ALLOGRAPHS.put(mappingName, mappingMap); } } catch (SAXException e) { e = null; Notification.show(READING_ERROR_MESSAGE); } catch (IOException e) { e = null; Notification.show(READING_ERROR_MESSAGE); } catch (ParserConfigurationException e) { e = null; Notification.show(READING_ERROR_MESSAGE); } } private String removeCombiningCharacters(String s) { String t = ""; for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); int cp = (int) c; if (!(((cp > 767) & (cp < 880)) | ((cp > 1154) & (cp < 1162)) | (cp == 1619) | ((cp > 2026) & (cp < 2036)) | (cp == 4352) | ((cp > 4956) & (cp < 4960)) | (cp == 6783) | ((cp > 7018) & (cp < 7028)) | ((cp > 7615) & (cp < 7655)) | ((cp > 7675) & (cp < 7680)) | ((cp > 8399) & (cp < 8433)) | ((cp > 11502) & (cp < 11506)) | ((cp > 11743) & (cp < 11776)) | ((cp > 12440) & (cp < 12443)) | ((cp > 42606) & (cp < 42611)) | ((cp > 42611) & (cp < 42622)) | ((cp > 42654) & (cp < 42738)) | ((cp > 43231) & (cp < 43250)) | ((cp > 65055) & (cp < 65063)) | (cp == 66045) | ((cp > 119140) & (cp < 119146)) | ((cp > 119148) & (cp < 119155)) | ((cp > 119162) & (cp < 119171)) | ((cp > 119172) & (cp < 119180)) | ((cp > 119209) & (cp < 119214)) | ((cp > 119361) & (cp < 119365)))) { t = t + c; } } return t; } public int compare(Object a, Object b, String mapname) /* * use with Strings only * * <0: a<b * =0: a=b * >0: a>b * * compare() is split in 2 methods to make contains() * more comfortable (contains() could use compare2(), * so that a multiple application of removeCombiningCharacters() * on the same string is avoided) * */ { String s1 = removeCombiningCharacters((String) a); String s2 = removeCombiningCharacters((String) b); //compare without spaces return compare2(s1.replace(" ", ""), s2.replace(" ", ""), mapname); } private int compare2(String s1, String s2, String mapname) { int l = s1.length(); if (l < s2.length()) { return -1; } else if (l > s2.length()) { return 1; } for (int i = 0; i < l; i++) { char c1 = s1.charAt(i); char c2 = s2.charAt(i); HashMap<Character, Character> curMap = ALLOGRAPHS.get(mapname); char rc1 = curMap.containsKey(c1) ? curMap.get(c1) : c1; char rc2 = (curMap.containsKey(c2)) ? curMap.get(c2) : c2; if (rc1 < rc2) { return -1; } else if (rc1 > rc2) { return 1; } } return 0; } public boolean startsWith(String fullSequence, String subSequence, String mapname) { //kill diacritics: String subS = removeCombiningCharacters(subSequence); String fullS = removeCombiningCharacters(fullSequence); //remove spaces: subS = subS.replace(" ", ""); fullS = fullS.replace(" ", ""); int l = subS.length(); if (fullS.length() < l) { return false; } return (compare2(fullS.substring(0, l), subS, mapname) == 0); } public boolean contains(String fullSequence, String subSequence, String mapname) { //kill diacritics: String subS = removeCombiningCharacters(subSequence); String fullS = removeCombiningCharacters(fullSequence); //remove spaces: subS = subS.replace(" ", ""); fullS = fullS.replace(" ", ""); int l = subS.length(); for (int i = 0; i < fullS.length() - l + 1; i++) { if (compare2(fullS.substring(i, i + l), subS, mapname) == 0) { return true; } } return false; } }