org.apache.uima.ruta.resource.MultiTreeWordList.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.uima.ruta.resource.MultiTreeWordList.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.uima.ruta.resource;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.ruta.RutaStream;
import org.apache.uima.ruta.UIMAConstants;
import org.apache.uima.ruta.type.RutaBasic;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;

/**
 * Class MultiTreeWordList.
 * 
 * 
 */
public class MultiTreeWordList implements RutaWordList {

    private static final String ENCODING = "UTF-8";

    private MultiTreeWordListPersistence persistence = new MultiTreeWordListPersistence();

    /** The root of the TreeWordList. */
    protected MultiTextNode root;

    /** The cost model we are using. */
    private EditDistanceCostMap costMap;

    /**
     * Default constructor.
     */
    public MultiTreeWordList() throws IOException {
        this(new String[] {});
    }

    /**
     * Default constructor uses just one file.
     * 
     * @param pathname
     *          the pathname of the used file.
     */
    public MultiTreeWordList(String pathname) throws IOException {
        this(new FileSystemResource(pathname));
    }

    /**
     * @param lists
     *          Resources to load.
     * @throws IOException
     *           When there is a problem reading a resource.
     */
    public MultiTreeWordList(Resource... lists) throws IOException {
        this.root = new MultiTextNode();
        this.costMap = new EditDistanceCostMap();

        for (Resource list : lists) {
            // check if the resource is a directory
            File directory = null;
            try {
                directory = list.getFile();
            } catch (IOException e) {
                // resource is not on the file system
                directory = null;
            }

            if (directory != null && directory.isDirectory()) {
                // resource is a directory, load its content
                for (File data : directory.listFiles()) {
                    load(new FileSystemResource(data));
                }
            } else {
                // resource is not a directory, load it normally
                load(list);
            }
        }
    }

    /**
     * Constructor from an open stream. This method will close the stream.
     * 
     * @param stream
     *          the stream to read the file from.
     * @param name
     *          associated name
     */
    public MultiTreeWordList(InputStream stream, String name) throws IOException {
        this.root = new MultiTextNode();
        this.costMap = new EditDistanceCostMap();

        if (name.endsWith(".mtwl"))
            persistence.readMTWL(root, stream, ENCODING);
        if (name.endsWith(".txt"))
            buildNewTree(stream, name);
    }

    /**
     * Constructs a TreeWordList from a file with path = filename
     * 
     * @param pathnames
     *          path of the file to create a TextWordList from
     */
    public MultiTreeWordList(String[] pathnames) throws IOException {
        this.root = new MultiTextNode();
        this.costMap = new EditDistanceCostMap();
        for (String pathname : pathnames) {
            load(new FileSystemResource(pathname));
        }
    }

    public MultiTreeWordList(List<File> files) throws IOException {
        this.root = new MultiTextNode();
        this.costMap = new EditDistanceCostMap();
        for (File file : files) {
            load(new FileSystemResource(file));
        }
    }

    /**
     * Load a resource in this word list.
     * 
     * @param resource
     *          Resource to load. The resource's name must end with .txt or .mtwl.
     * @throws IOException
     *           When there is a problem reading the resource.
     */
    private void load(Resource resource) throws IOException {
        final String name = resource.getFilename();
        InputStream stream = null;
        try {
            stream = resource.getInputStream();
            if (name == null) {
                throw new IllegalArgumentException("List does not have a name.");
            } else if (name.endsWith(".txt")) {
                buildNewTree(stream, name);
            } else if (name.endsWith(".mtwl")) {
                persistence.readMTWL(root, stream, "UTF-8");
            } else {
                throw new IllegalArgumentException("File name should end with .mtwl or .txt, found " + name);
            }
        } finally {
            if (stream != null) {
                stream.close();
            }
        }
    }

    /**
     * Creates a new Tree in the existing treeWordList from a file with path pathname
     * 
     * @param stream
     *          Input stream for the file containing the words for the treeWordList
     * @param name
     *          Associated name for the file
     */
    public void buildNewTree(InputStream stream, String name) throws IOException {
        BufferedReader br = new BufferedReader(new InputStreamReader(stream, ENCODING));
        String s = null;

        while ((s = br.readLine()) != null) {
            addWord(s.trim(), name);
        }
        stream.close();
        br.close();
    }

    /**
     * Add a new String into the MultiTreeWordList.
     * 
     * @param s
     *          The String to add
     * @param type
     *          The type of the string.
     */
    public void addWord(String s, String type) {

        // Create Nodes from all chars of the strings besides the last one
        MultiTextNode pointer = root;

        for (Character each : s.toCharArray()) {

            MultiTextNode childNode = pointer.getChildNode(each);

            if (childNode == null) {
                childNode = new MultiTextNode(each, false);
                pointer.addChild(childNode);
            }

            pointer = childNode;
        }
        pointer.setWordEnd(s.length() > 0);
        pointer.addType(type);
    }

    /**
     * Returns all Types contained by the MultiTreeWordList.
     * 
     * @return all Types contained by the MultiTreeWordList.
     */
    public Collection<String> getTypes() {
        return getTypeCone(root);
    }

    /**
     * Returns all types contained by the cone of the MultiTextNode node, including the types of node
     * itself.
     * 
     * @param node
     *          The node where we start, the root of the cone.
     * @return all types contained by the cone of the MultiTextNode node, including the types of node
     *         itself.
     */
    public Collection<String> getTypeCone(MultiTextNode node) {
        // TODO improve this method!
        List<String> returnList = new LinkedList<String>();

        if (node.getTypes() != null) {
            for (String s : node.getTypes()) {
                if (!returnList.contains(s)) {
                    returnList.add(s);
                }
            }
        }

        for (Character c : node.getChildren().keySet()) {
            for (String s : getTypeCone(node.getChildNode(c))) {
                if (!returnList.contains(s)) {
                    returnList.add(s);
                }
            }
        }

        return returnList;
    }

    /**
     * Returns all strings contained by the MultiTreeWordList.
     * 
     * @return All strings contained by the MultiTreeWordList.
     */
    public Collection<String> keySet() {
        List<String> keySet = new LinkedList<String>(keySet(root, ""));
        Collections.sort(keySet);
        return keySet;
    }

    /**
     * Returns all strings contained by the cone of the MultiTextNode node and uses prefix as the
     * prefix of all the strings.
     * 
     * @param node
     *          the node we are considering.
     * @param prefix
     *          the prefix until now.
     * @return All strings contained by the cone of the MultiTextNode node.
     */
    private Collection<String> keySet(MultiTextNode node, String prefix) {

        List<String> resultList = new LinkedList<String>();

        // Recursion stop.
        if (node.isWordEnd()) {
            resultList.add(prefix);
        }

        // Recursion step.
        for (Character c : node.getChildren().keySet()) {
            String temp = prefix + String.valueOf(c);
            resultList.addAll(keySet(node.getChildNode(c), temp));
        }

        return resultList;
    }

    /**
     * Returns all types of the very string s.
     * 
     * @param s
     *          The string with the types.
     * @return All types from the very string s.
     */
    public Collection<String> getTypes(String s) {
        return getTypes(s, false);
    }

    /**
     * Returns the types of the string s.
     * 
     * @param s
     *          The string with the types.
     * @param ignoreCase
     *          Indicates, whether we search case sensitive or not.
     * @return The types of the string s.
     */
    public Collection<String> getTypes(String s, boolean ignoreCase) {

        // Collection<Set<String>> types = editDistanceClever(root, s, "", 0.0,
        // 0,
        // ignoreCase, false, costMap, false, false).values();
        // Map<String, Set<String>> types = editDistanceClever(root, s, "", 0.0,
        // 0, ignoreCase, false, costMap, false, false);
        Map<String, Set<String>> types = editDistance(s, 0, ignoreCase, "");
        Set<String> returnSet = new HashSet<String>();

        for (Entry<String, Set<String>> each : types.entrySet()) {
            returnSet.addAll(each.getValue());
        }

        return returnSet;
    }

    /**
     * Returns a list of types which belong to a string.
     * 
     * @param string
     *          The string which types we want to have.
     * @param ignoreCase
     *          Indicates whether we search case sensitive or not.
     * @param ignoreLength
     *          If the length of the string is less than of equal to this, we search case insensitive.
     * @param edit
     *          Indicates whether we use an edit distance or not.
     * @param distance
     *          The edit distance to a string contained by the MultiTreeWordList.
     * @param ignoreToken
     *          Characters which can be ignored.
     * @return Returns a list of types which belong to a string.
     */

    public List<String> contains(String string, boolean ignoreCase, int ignoreLength, boolean edit, double distance,
            String ignoreToken) {

        List<String> resultList = new LinkedList<String>();
        Map<String, Set<String>> editDistance;

        if (string.length() >= ignoreLength && ignoreCase) {
            editDistance = editDistance(string, (int) distance, true, ignoreToken, false);
        } else {
            editDistance = editDistance(string, (int) distance, false, ignoreToken, false);
        }
        for (Entry<String, Set<String>> each : editDistance.entrySet()) {
            resultList.addAll(each.getValue());
        }
        return resultList;
    }

    /**
     * Checks whether a string is contained by the MultiTreeWordList or not.
     * 
     * @param string
     *          The string which is contained or not.
     * @param ignoreCase
     *          Indicates whether we search case sensitive or not.
     * @param ignoreLength
     *          If the length of the string is less than of equal to this, we search case insensitive.
     * @param edit
     *          Indicates whether we use an edit distance or not.
     * @param distance
     *          The edit distance to a string contained by the MultiTreeWordList.
     * @param ignoreToken
     *          Characters which can be ignored.
     * @return true, if the string is contained by the MultiTreeWordList, false otherwise.
     */
    public boolean containsBool(String string, boolean ignoreCase, int ignoreLength, boolean edit, double distance,
            String ignoreToken) {
        return editDistanceBool(root, string, "", distance, 0, ignoreCase, false, costMap);
    }

    /**
     * Checks whether the tree contains exaclty the string s.
     * 
     * @param s
     *          The string which is contained or not.
     * @return True, if the TreeWordList contains exactly the string s, false otherwise.
     */
    public boolean contains(String s) {
        return contains(s, false);
    }

    /**
     * Checks whether the tree contains the string s.
     * 
     * @param s
     *          The string which is contained or not.
     * @param ignoreCase
     *          Indicates whether we search case sensitive or not.
     * @return True, if the TreeWordList contains the string s, false otherwise.
     */
    public boolean contains(String s, boolean ignoreCase) {
        return contains(s, ignoreCase, 0, new char[] {}, 0, true);
    }

    /**
     * Checks if the MultiTreeWordList contains the string s.
     * 
     * @param s
     *          The string which is contained or not.
     * @param ignoreCase
     *          Indicates whether we search case sensitive or not.
     * @param size
     *          The index of the string.
     * @param ignoreChars
     *          Characters which can be ignored.
     * @param maxIgnoreChars
     *          The maximum number of ignored characters.
     * @return true, if TreeWordList contains the string, false otherwise.
     */
    public boolean contains(String s, boolean ignoreCase, int size, char[] ignoreChars, int maxIgnoreChars,
            boolean ignoreWS) {

        EditDistanceCostMap edm = new EditDistanceCostMap();

        for (Character c : ignoreChars) {
            edm.setDeleteCosts(c, 0.0);
        }

        return editDistanceBool(root, s, "", maxIgnoreChars, 0, ignoreCase, false, edm);
    }

    /**
     * Checks if the MultiTreeWordList contains a prefix of the string s.
     * 
     * @param s
     *          The string which is contained or not.
     * @param ignoreCase
     *          Indicates whether we search case sensitive or not.
     * @param size
     *          The index of the string.
     * @param ignoreChars
     *          Characters which can be ignored.
     * @param maxIgnoreChars
     *          The maximum number of ignored characters.
     * @return true, if TreeWordList contains a prefix of the string, false otherwise.
     */
    public boolean containsFragment(String s, boolean ignoreCase, int size, char[] ignoreChars, int maxIgnoreChars,
            boolean ignoreWS) {
        MultiTextNode pointer = root;
        return recursiveContains(pointer, s, 0, ignoreCase && s.length() > size, true, ignoreChars, maxIgnoreChars);
    }

    /**
     * Checks whether prefix of a string is contained by the MultiTreeWordList or not.
     * 
     * @param string
     *          The string whose prefix is contained or not.
     * @param ignoreCase
     *          Indicates whether we search case sensitive or not.
     * @param ignoreLength
     *          If the length of the string is less than of equal to this, we search case insensitive.
     * @param edit
     *          Indicates whether we use an edit distance or not.
     * @param distance
     *          The edit distance to a string contained by the MultiTreeWordList.
     * @param ignoreToken
     *          Characters which can be ignored.
     * @return true, if a prefix of the string is contained by the MultiTreeWordList, false otherwise.
     */
    public boolean containsFragmentBool(String string, boolean ignoreCase, int ignoreLength, boolean edit,
            double distance, String ignoreToken) {

        if (string.length() >= ignoreLength && ignoreCase) {
            return editDistanceBool(root, string, "", distance, 0, true, true, costMap);
        } else {
            return editDistanceBool(root, string, "", distance, 0, false, true, costMap);
        }
    }

    /**
     * Returns a list of types which belong to a prefix of a string that is contained by the
     * MultiTreeWordList.
     * 
     * @param string
     *          The string whose prefix's types we are interested in.
     * @param ignoreCase
     *          Indicates whether we search case sensitive or not.
     * @param ignoreLength
     *          If the length of the string is less than of equal to this, we search case insensitive.
     * @param edit
     *          Indicates whether we use an edit distance or not.
     * @param distance
     *          The edit distance to a string contained by the MultiTreeWordList.
     * @param ignoreToken
     *          Characters which can be ignored.
     * @return A list of types which belong to a prefix of a string that is contained by the
     *         MultiTreeWordList.
     */
    public List<String> containsFragment(String string, boolean ignoreCase, int ignoreLength, boolean edit,
            double distance, String ignoreToken) {

        List<String> resultList = new LinkedList<String>();
        Map<String, Set<String>> resultMap = null;

        if (!edit) {
            return recursiveContains2(root, string, 0, ignoreCase && string.length() > ignoreLength, true,
                    ignoreToken.toCharArray(), ignoreLength);
        } else {
            if (string.length() >= ignoreLength && ignoreCase) {
                resultMap = editDistance(string, (int) distance, true, ignoreToken, true);
            } else {
                resultMap = editDistance(string, (int) distance, false, ignoreToken, true);
            }

            for (Set<String> set : resultMap.values()) {
                for (String s : set) {
                    if (!resultList.contains(s)) {
                        // resultList.addAll(resultMap.get(set));
                        resultList.add(s);
                    }
                }
            }
        }

        return resultList;
    }

    /**
     * Returns true, if the MultiTreeWordList contains the string text, false otherwise.
     * 
     * @param pointer
     *          The MultiTextNode we are looking at.
     * @param text
     *          The string which is contained or not.
     * @param index
     *          The index of the string text we checked until now.
     * @param ignoreCase
     *          Indicates whether we search case sensitive or not.
     * @param fragment
     *          Indicates whether we are looking for a prefix of the string text.
     * @param ignoreChars
     *          Characters which can be ignored.
     * @param maxIgnoreChars
     *          Maximum number of characters which are allowed to be ignored.
     * @return True, if the TreeWordList contains the string text, false otherwise.
     */
    private List<String> recursiveContains2(MultiTextNode pointer, String text, int index, boolean ignoreCase,
            boolean fragment, char[] ignoreChars, int maxIgnoreChars) {

        if (pointer == null) {
            return null;
        }

        if (index == text.length()) {
            if (pointer.isWordEnd()) {
                return new ArrayList<String>(pointer.getTypes());
            }
            if (fragment) {
                return Collections.emptyList();
            }
        }

        char charAt = text.charAt(index);
        boolean charAtIgnored = false;

        if (ignoreChars != null) {
            for (char each : ignoreChars) {
                if (each == charAt) {
                    charAtIgnored = true;
                    break;
                }
            }
            charAtIgnored &= index != 0;
        }

        int next = ++index;

        if (ignoreCase) {

            // Lower Case Node.
            MultiTextNode childNodeL = pointer.getChildNode(Character.toLowerCase(charAt));
            if (childNodeL == null) {
                childNodeL = skipWS(pointer, Character.toLowerCase(charAt));
            }

            // Upper Case Node.
            MultiTextNode childNodeU = pointer.getChildNode(Character.toUpperCase(charAt));
            if (childNodeU == null) {
                childNodeU = skipWS(pointer, Character.toUpperCase(charAt));
            }

            if (charAtIgnored && childNodeL == null && childNodeU == null) {
                // Character is ignored and does not appear.
                return recursiveContains2(pointer, text, next, ignoreCase, fragment, ignoreChars, maxIgnoreChars);
            } else {
                // Recursion.
                Collection<String> recursiveContainsL = recursiveContains2(childNodeL, text, next, ignoreCase,
                        fragment, ignoreChars, maxIgnoreChars);
                Collection<String> recursiveContainsU = recursiveContains2(childNodeU, text, next, ignoreCase,
                        fragment, ignoreChars, maxIgnoreChars);
                if (recursiveContainsL == null && recursiveContainsU == null) {
                    return null;
                }
                List<String> result = new LinkedList<String>();
                if (recursiveContainsL != null) {
                    result.addAll(recursiveContainsL);
                }
                if (recursiveContainsU != null) {
                    result.addAll(recursiveContainsU);
                }
                return result;
            }

        } else {
            // Case sensitive.
            MultiTextNode childNode = pointer.getChildNode(charAt);

            if (charAtIgnored && childNode == null) {
                // Recursion with incremented index.
                return recursiveContains2(pointer, text, next, ignoreCase, fragment, ignoreChars, maxIgnoreChars);
            } else {
                // Recursion with new node.
                return recursiveContains2(childNode, text, next, ignoreCase, fragment, ignoreChars, maxIgnoreChars);
            }
        }
    }

    private MultiTextNode skipWS(MultiTextNode pointer, char charAt) {
        MultiTextNode childNode = pointer.getChildNode(' ');
        if (childNode != null) {
            MultiTextNode node = childNode.getChildNode(charAt);
            if (node == null) {
                return skipWS(childNode, charAt);
            } else {
                return node;
            }
        }
        return null;
    }

    /**
     * Returns true, if the MultiTreeWordList contains the string text, false otherwise.
     * 
     * @param pointer
     *          The MultiTextNode we are looking at.
     * @param text
     *          The string which is contained or not.
     * @param index
     *          The index of the string text we checked until now.
     * @param ignoreCase
     *          Indicates whether we search case sensitive or not.
     * @param fragment
     *          Indicates whether we are looking for a prefix of the string text.
     * @param ignoreChars
     *          Characters which can be ignored.
     * @param maxIgnoreChars
     *          Maximum number of characters which are allowed to be ignored.
     * @return True, if the TreeWordList contains the string text, false otherwise.
     */
    private boolean recursiveContains(MultiTextNode pointer, String text, int index, boolean ignoreCase,
            boolean fragment, char[] ignoreChars, int maxIgnoreChars) {

        if (pointer == null) {
            return false;
        }

        if (index == text.length()) {
            return fragment || pointer.isWordEnd();
        }

        char charAt = text.charAt(index);
        boolean charAtIgnored = false;

        if (ignoreChars != null) {
            for (char each : ignoreChars) {
                if (each == charAt) {
                    charAtIgnored = true;
                    break;
                }
            }
            charAtIgnored &= index != 0;
        }

        int next = ++index;

        if (ignoreCase) {

            // Lower Case Node.
            MultiTextNode childNodeL = pointer.getChildNode(Character.toLowerCase(charAt));

            // Upper Case Node.
            MultiTextNode childNodeU = pointer.getChildNode(Character.toUpperCase(charAt));

            if (charAtIgnored && childNodeL == null && childNodeU == null) {
                // Character is ignored and does not appear.
                return recursiveContains(pointer, text, next, ignoreCase, fragment, ignoreChars, maxIgnoreChars);
            } else {
                // Recursion.
                return recursiveContains(childNodeL, text, next, ignoreCase, fragment, ignoreChars, maxIgnoreChars)
                        || recursiveContains(childNodeU, text, next, ignoreCase, fragment, ignoreChars,
                                maxIgnoreChars);
            }

        } else {
            // Case sensitive.
            MultiTextNode childNode = pointer.getChildNode(charAt);

            if (charAtIgnored && childNode == null) {
                // Recursion with incremented index.
                return recursiveContains(pointer, text, next, ignoreCase, fragment, ignoreChars, maxIgnoreChars);
            } else {
                // Recursion with new node.
                return recursiveContains(childNode, text, next, ignoreCase, fragment, ignoreChars, maxIgnoreChars);
            }
        }
    }

    public Collection<AnnotationFS> find(RutaStream stream, Map<String, Object> typeMap, boolean ignoreCase,
            int ignoreLength, boolean edit, double distance, String ignoreToken) {

        Collection<AnnotationFS> results = new HashSet<AnnotationFS>();
        stream.moveToFirst();
        FSIterator<AnnotationFS> streamPointer = stream.copy();

        while (stream.isValid()) {
            RutaBasic anchorBasic = (RutaBasic) stream.get();
            streamPointer.moveTo(anchorBasic);

            List<RutaBasic> basicsToAdd = new ArrayList<RutaBasic>();
            basicsToAdd.add(anchorBasic);
            String text = anchorBasic.getCoveredText();
            StringBuilder candidate = new StringBuilder(text);
            String lastCandidate = candidate.toString();

            if (text.length() != 1 || !ignoreToken.contains(text)) {

                List<AnnotationFS> interResults = new ArrayList<AnnotationFS>();

                while (streamPointer.isValid()) {

                    boolean skip = false;
                    String currentBasicText = basicsToAdd.get(basicsToAdd.size() - 1).getCoveredText();
                    if (currentBasicText.length() == 1 && ignoreToken.contains(currentBasicText)) {
                        skip = true;
                    }
                    List<String> types = null;
                    if (!skip) {
                        types = containsFragment(candidate.toString(), ignoreCase, ignoreLength, edit, distance,
                                ignoreToken);
                    }
                    if (skip || types != null) {
                        streamPointer.moveToNext();
                        if (streamPointer.isValid()) {
                            RutaBasic next = (RutaBasic) streamPointer.get();
                            // List<String> contains = contains(candidate,
                            // ignoreCase,
                            // ignoreLength, edit, distance, ignoreToken);
                            if (!skip) {
                                tryToCreateAnnotation(types, stream, results, basicsToAdd, candidate.toString(),
                                        interResults, ignoreCase, ignoreLength, edit, distance, ignoreToken,
                                        typeMap);
                            }
                            lastCandidate = candidate.toString();
                            candidate.append(next.getCoveredText());
                            basicsToAdd.add(next);

                        } else {
                            // !streamPointer.isValid();
                            tryToCreateAnnotation(types, stream, results, basicsToAdd, lastCandidate, interResults,
                                    ignoreCase, ignoreLength, edit, distance, ignoreToken, typeMap);
                        }
                    } else {

                        // containsFragment.isEmpty();
                        // basicsToAdd.remove(basicsToAdd.size() - 1);
                        // tryToCreateAnnotation(stream, results, basicsToAdd,
                        // lastCandidate, interResults, ignoreCase,
                        // ignoreLength, edit, distance, ignoreToken, typeMap);

                        // breaks inner while()-loop.
                        break;
                    }

                }
            }
            stream.moveToNext();
        }

        return results;
    }

    public List<AnnotationFS> find(RutaStream stream, boolean ignoreCase, int size, char[] ignoreChars,
            int maxIgnoredChars, boolean ignoreWS) {
        assert false;
        return new ArrayList<AnnotationFS>();
    }

    private void tryToCreateAnnotation(List<String> types, RutaStream stream, Collection<AnnotationFS> results,
            List<RutaBasic> basicsToAdd, String lastCandidate, List<AnnotationFS> interResult, boolean ignoreCase,
            int ignoreLength, boolean edit, double distance, String ignoreToken, Map<String, Object> map) {
        if (basicsToAdd.size() >= 1 && types != null) {
            Set<String> set = new HashSet<String>(types);
            for (String each : set) {
                Object o = map.get(each);
                if (o instanceof Type) {
                    Type type = (Type) o;
                    int begin = basicsToAdd.get(0).getBegin();
                    int end = basicsToAdd.get(basicsToAdd.size() - 1).getEnd();
                    AnnotationFS newFS = stream.getCas().createAnnotation(type, begin, end);
                    results.add(newFS);
                } else if (o instanceof List) {
                    List<?> list = (List<?>) o;
                    Type type = null;
                    String featureString = null;
                    Object value = each;
                    if (list.size() == 2 || list.size() == 3) {
                        if (list.get(0) instanceof Type) {
                            type = (Type) list.get(0);
                        }
                        if (list.get(1) instanceof String) {
                            featureString = (String) list.get(1);
                        }
                        if (list.size() == 3) {
                            value = list.get(2);
                        }

                        if (type != null && featureString != null) {
                            int begin = basicsToAdd.get(0).getBegin();
                            int end = basicsToAdd.get(basicsToAdd.size() - 1).getEnd();
                            AnnotationFS newFS = stream.getCas().createAnnotation(type, begin, end);
                            Feature feature = type.getFeatureByBaseName(featureString);
                            setFeatureValue(newFS, feature, value);
                            results.add(newFS);
                        }
                    }
                }
            }
        } else if (interResult != null && !interResult.isEmpty()) {
            results.addAll(interResult);
        }
    }

    private void setFeatureValue(AnnotationFS annotationFS, Feature feature, Object o) {
        if (feature != null && o != null) {
            Type range = feature.getRange();
            String rangeName = range.getName();
            if (rangeName.equals(UIMAConstants.TYPE_STRING) && o instanceof String) {
                annotationFS.setStringValue(feature, (String) o);
            } else if (rangeName.equals(UIMAConstants.TYPE_INTEGER) && o instanceof Number) {
                annotationFS.setIntValue(feature, ((Number) o).intValue());
            } else if (rangeName.equals(UIMAConstants.TYPE_DOUBLE) && o instanceof Number) {
                annotationFS.setDoubleValue(feature, ((Number) o).doubleValue());
            } else if (rangeName.equals(UIMAConstants.TYPE_FLOAT) && o instanceof Number) {
                annotationFS.setFloatValue(feature, ((Number) o).floatValue());
            } else if (rangeName.equals(UIMAConstants.TYPE_BYTE) && o instanceof Number) {
                annotationFS.setByteValue(feature, ((Number) o).byteValue());
            } else if (rangeName.equals(UIMAConstants.TYPE_SHORT) && o instanceof Number) {
                annotationFS.setShortValue(feature, ((Number) o).shortValue());
            } else if (rangeName.equals(UIMAConstants.TYPE_LONG) && o instanceof Number) {
                annotationFS.setLongValue(feature, ((Number) o).longValue());
            } else if (rangeName.equals(UIMAConstants.TYPE_BOOLEAN) && o instanceof Boolean) {
                annotationFS.setBooleanValue(feature, (Boolean) o);
            } else if (rangeName.equals(UIMAConstants.TYPE_STRING) & o instanceof Type) {
                annotationFS.setStringValue(feature, ((Type) o).getName());
            }
        } else {
            throw new IllegalArgumentException("Not able to assign feature value: " + o + " -> " + feature);
        }
    }

    /**
     * Returns a map with all strings with a specified edit distance to the string query as keys and
     * the files they belong to as values.
     * 
     * @param query
     *          The query string.
     * @return A map with all strings with a specified edit distance to the string query as keys and
     *         the files they belong to as values.
     */
    public Map<String, Set<String>> editDistance(String query, int distance) {
        return editDistance(query, distance, false, "");
    }

    /**
     * Returns a map with all strings with a specified edit distance to the string query as keys and
     * the files they belong to as values.
     * 
     * @param query
     *          The query string.
     * @param distance
     *          The specified edit distance.
     * @param ignoreCase
     *          Indicates whether we search case sensitive or not.
     * @return A map with all strings with a specified edit distance to the string query as keys and
     *         the files they belong to as values.
     */
    public Map<String, Set<String>> editDistance(String query, int distance, boolean ignoreCase,
            String ignoreToken) {
        return editDistance(query, distance, ignoreCase, ignoreToken, false);
    }

    /**
     * Returns a map with all strings with a specified edit distance to the string query as keys and
     * the files they belong to as values.
     * 
     * @param query
     *          The query string.
     * @param distance
     *          The specified edit distance.
     * @param ignoreCase
     *          Indicates whether we search case sensitive or not.
     * @param fragment
     *          Indicates whether we search for fragments of the query string or not.
     * @return A map with all strings with a specified edit distance to the string query as keys and
     *         the files they belong to as values.
     */
    public Map<String, Set<String>> editDistance(String query, int distance, boolean ignoreCase, String ignoreToken,
            boolean fragment) {

        // The second alternative realizes the fragment functionality by
        // setting the insert costs of the ignored character to zero. This
        // is much more elegant and easier to maintain. I don't know if the
        // other way is faster, so I did not delete it yet.

        Map<Character, Double> oldInsertCosts = new HashMap<Character, Double>();
        EditDistanceCostMap edcm = new EditDistanceCostMap();

        // We need to store the old insert costs before we set them to zero.
        for (char c : ignoreToken.toCharArray()) {
            oldInsertCosts.put(c, edcm.getInsertCosts(c));
            edcm.setInsertCosts(c, 0.0);
        }

        Map<String, Set<String>> result = null;

        if (ignoreCase) {
            result = editDistanceClever(root, query.toLowerCase(), "", distance, 0, true, fragment, edcm, false,
                    false);
        } else {
            result = editDistanceClever(root, query, "", distance, 0, false, fragment, edcm, false, false);
        }

        // Restoring of the old insert costs.
        for (Entry<Character, Double> c : oldInsertCosts.entrySet()) {
            edcm.setDeleteCosts(c.getKey(), c.getValue());
        }

        return result;
    }

    /**
     * Returns a map with all strings with a specified edit distance to the string query as keys and
     * the files they belong to as values.
     * 
     * @param node
     *          The MultiTextNode which is under consideration at the moment.
     * @param query
     *          The query string.
     * @param result
     *          The result which matched until now.
     * @param distance
     *          The remaining edit distance.
     * @param index
     *          The index of the query string at the moment.
     * @param ignoreCase
     *          Indicates whether we search case sensitive or not.
     * @param fragment
     *          Indicates whether we search for fragments of the query string or not.
     * @param edm
     *          The edit distance cost map we are using.
     * @param lastActionInsert
     *          Indicates whether the last action was an insert action.
     * @param lastActionDelete
     *          Indicates whether the last action was a delete action.
     * @return A map with all strings with a specified edit distance to the string query as keys and
     *         the files they belong to as values.
     */
    private Map<String, Set<String>> editDistanceClever(MultiTextNode node, String query, String result,
            double distance, int index, boolean ignoreCase, boolean fragment, EditDistanceCostMap edm,
            boolean lastActionInsert, boolean lastActionDelete) {

        EditDistanceResultMap resultMap = new EditDistanceResultMap();

        if (!lastActionInsert) {
            // Delete.
            if (distance - edm.getDeleteCosts(node.getValue()) >= 0 && result.length() > 0) {
                resultMap.putAll(
                        editDistanceClever(node, query, result, distance - edm.getDeleteCosts(node.getValue()),
                                index + 1, ignoreCase, fragment, edm, false, true));
            }
        }

        // Recursion stop.
        if (node.isWordEnd() || fragment) {

            HashMap<String, Set<String>> temp = new HashMap<String, Set<String>>();

            double remainingInsertCosts = 0.0;

            // Accumulating remaining insert costs if the query is longer than
            // the word in the trie.
            for (int i = index; i < query.length(); i++) {
                remainingInsertCosts += edm.getInsertCosts(query.charAt(i));
            }

            if (remainingInsertCosts <= distance) {
                // if (remainingInsertCosts <= distance &&
                // !node.getTypes().isEmpty()) {
                // if (query.length() - index <= distance) {

                if (fragment) {
                    temp.put(result, new HashSet<String>(getTypeCone(node)));
                } else {
                    temp.put(result, new HashSet<String>(node.getTypes()));
                }

                resultMap.putAll(temp);
            }

            // Important: word end does not mean no children any more!
            if (node.getChildren() == null) {
                return resultMap;
            }
        }

        // Recursion.
        for (MultiTextNode tempNode : node.getChildren().values()) {

            if (index < query.length()) {
                if (ignoreCase) {
                    if (Character.toLowerCase(tempNode.getValue()) == Character.toLowerCase(query.charAt(index))) {
                        resultMap.putAll(editDistanceClever(tempNode, query, result + tempNode.getValue(), distance,
                                index + 1, ignoreCase, fragment, edm, false, false));
                    }
                } else {
                    if (tempNode.getValue() == query.charAt(index)) {
                        resultMap.putAll(editDistanceClever(tempNode, query, result + tempNode.getValue(), distance,
                                index + 1, ignoreCase, fragment, edm, false, false));
                    }
                }
            }

            if (distance - edm.getReplaceCosts(node.getValue(), tempNode.getValue()) >= 0) {

                // Substitute.
                resultMap.putAll(editDistanceClever(tempNode, query, result + tempNode.getValue(),
                        distance - edm.getReplaceCosts(node.getValue(), tempNode.getValue()), index + 1, ignoreCase,
                        fragment, edm, false, false));
            }

            if (!lastActionDelete) {
                if (distance - edm.getInsertCosts(tempNode.getValue()) >= 0) {
                    // Insert - use the same index twice.
                    resultMap.putAll(editDistanceClever(tempNode, query, result + tempNode.getValue(),
                            distance - edm.getInsertCosts(tempNode.getValue()), index, ignoreCase, fragment, edm,
                            true, false));
                }
            }
        }

        return resultMap;
    }

    /**
     * Checks if a string is contained by the MultiTreeWordList.
     * 
     * @param node
     *          The MultiTextNode which is under consideration at the moment.
     * @param query
     *          The query string.
     * @param result
     *          The result which matched until now.
     * @param distance
     *          The remaining edit distance.
     * @param index
     *          The index of the query string at the moment.
     * @param ignoreCase
     *          Indicates whether we search case sensitive or not.
     * @param fragment
     *          Indicates whether we search for fragments of the query string or not.
     * @param edm
     *          The edit distance cost map we are using.
     * @return A map with all strings with a specified edit distance to the string query as keys and
     *         the files they belong to as values.
     */
    private boolean editDistanceBool(MultiTextNode node, String query, String result, double distance, int index,
            boolean ignoreCase, boolean fragment, EditDistanceCostMap edm) {

        boolean deletion = false;
        boolean insertion = false;
        boolean substitution = false;
        boolean noop = false;

        // Recursion stop.
        if (fragment) {
            if (index == query.length()) {
                return true;
            }
        }

        if (node.isWordEnd()) {

            double remainingInsertCosts = 0.0;

            // Accumulating remaining insert costs if the query is longer than
            // the word in the trie.
            for (int i = index; i < query.length(); i++) {
                remainingInsertCosts += edm.getInsertCosts(query.charAt(i));
            }

            if (remainingInsertCosts <= distance) {
                // if (query.length() - index <= distance) {
                return true;
            }
        }

        // Delete.
        if (distance - edm.getDeleteCosts(node.getValue()) >= 0 && result.length() > 0) {
            deletion = editDistanceBool(node, query, result, distance - edm.getDeleteCosts(node.getValue()),
                    index + 1, ignoreCase, fragment, edm);

            if (deletion) {
                return true;
            }
        }

        // Recursion.
        for (MultiTextNode tempNode : node.getChildren().values()) {

            if (index < query.length()) {
                if (ignoreCase) {
                    if (Character.toLowerCase(tempNode.getValue()) == Character.toLowerCase(query.charAt(index))) {
                        noop = editDistanceBool(tempNode, query, result + tempNode.getValue(), distance, index + 1,
                                ignoreCase, fragment, edm);
                    }
                } else {
                    if (tempNode.getValue() == query.charAt(index)) {
                        noop = editDistanceBool(tempNode, query, result + tempNode.getValue(), distance, index + 1,
                                ignoreCase, fragment, edm);
                    }
                }

                if (noop) {
                    return true;
                }
            }

            if (distance - edm.getReplaceCosts(node.getValue(), tempNode.getValue()) >= 0) {

                // Substitute.
                substitution = editDistanceBool(tempNode, query, result + tempNode.getValue(),
                        distance - edm.getReplaceCosts(node.getValue(), tempNode.getValue()), index + 1, ignoreCase,
                        fragment, edm);

                if (substitution) {
                    return true;
                }
            }

            if (distance - edm.getInsertCosts(tempNode.getValue()) >= 0) {
                // Insert - use the same index twice.
                insertion = editDistanceBool(tempNode, query, result + tempNode.getValue(),
                        distance - edm.getInsertCosts(tempNode.getValue()), index, ignoreCase, fragment, edm);

                if (insertion) {
                    return true;
                }
            }

        }

        return false;
    }

    // private Map<String, Set<String>> editDistance(MultiTextNode node, String query, String result,
    // double distance, int index, boolean ignoreCase, String ignoreToken, boolean fragment,
    // EditDistanceCostMap edm) {
    //
    //
    // EditDistanceResultMap resultMap = new EditDistanceResultMap();
    //
    // // Delete.
    // if (distance - edm.getDeleteCosts(node.getValue()) >= 0 && result.length() > 0) {
    // resultMap.putAll(editDistance(node, query, result, distance
    // - edm.getDeleteCosts(node.getValue()), index + 1, ignoreCase, ignoreToken, fragment,
    // edm));
    // }
    //
    // // Recursion stop.
    // if (node.isWordEnd()) {
    //
    // HashMap<String, Set<String>> temp = new HashMap<String, Set<String>>();
    //
    // if (query.length() - index <= distance) {
    // temp.put(result, new HashSet<String>(node.getTypes()));
    // resultMap.putAll(temp);
    // }
    //
    // // Ignore token at the end of the word.
    // if (ignoreToken.contains(String.valueOf(node.getValue()))) {
    // temp.put(result, new HashSet<String>(node.getTypes()));
    // resultMap.putAll(temp);
    // }
    //
    // if (node.getChildren() == null) {
    // return resultMap;
    // }
    // }
    //
    // // Recursion.
    // for (MultiTextNode tempNode : node.getChildren().values()) {
    //
    // if (index < query.length()) {
    // if (ignoreCase) {
    // if (Character.toLowerCase(tempNode.getValue()) == Character.toLowerCase(query
    // .charAt(index))) {
    // resultMap.putAll(editDistance(tempNode, query, result + tempNode.getValue(), distance,
    // index + 1, ignoreCase, ignoreToken, fragment, edm));
    // }
    // } else {
    // if (tempNode.getValue() == query.charAt(index)) {
    // resultMap.putAll(editDistance(tempNode, query, result + tempNode.getValue(), distance,
    // index + 1, ignoreCase, ignoreToken, fragment, edm));
    // }
    // }
    // }
    //
    // if (distance - edm.getReplaceCosts(node.getValue(), tempNode.getValue()) >= 0) {
    //
    // // Substitute.
    // resultMap.putAll(editDistance(tempNode, query, result + tempNode.getValue(), distance
    // - edm.getReplaceCosts(node.getValue(), tempNode.getValue()), index + 1, ignoreCase,
    // ignoreToken, fragment, edm));
    // }
    //
    // // Ignore token.
    // if (ignoreToken.contains(String.valueOf(tempNode.getValue()))) {
    // resultMap.putAll(editDistance(tempNode, query, result + tempNode.getValue(), distance,
    // index, ignoreCase, ignoreToken, fragment, edm));
    // } else {
    // if (distance - edm.getInsertCosts(tempNode.getValue()) >= 0) {
    // // Insert - use the same index twice.
    // resultMap.putAll(editDistance(tempNode, query, result + tempNode.getValue(), distance
    // - edm.getInsertCosts(tempNode.getValue()), index, ignoreCase, ignoreToken,
    // fragment, edm));
    // }
    // }
    // }
    //
    // return resultMap;
    // }

    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime * result + ((costMap == null) ? 0 : costMap.hashCode());
        result = prime * result + ((root == null) ? 0 : root.hashCode());
        return result;
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        MultiTreeWordList other = (MultiTreeWordList) obj;
        if (costMap == null) {
            if (other.costMap != null)
                return false;
        } else if (!costMap.equals(other.costMap))
            return false;
        if (root == null) {
            if (other.root != null)
                return false;
        } else if (!root.equals(other.root))
            return false;
        return true;
    }

    public void createMTWLFile(String path, boolean compress, String encoding) throws IOException {
        persistence.createMTWLFile(root, path, compress, encoding);
    }

}