eu.project.ttc.utils.TermSuiteUtils.java Source code

Introduction

Here is the source code for eu.project.ttc.utils.TermSuiteUtils.java
Source

/*******************************************************************************
 * Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *******************************************************************************/
package eu.project.ttc.utils;

import java.net.URL;
import java.net.URLClassLoader;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.lang.mutable.MutableInt;

import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import eu.project.ttc.models.TermWord;
import eu.project.ttc.types.TermOccAnnotation;
import eu.project.ttc.types.WordAnnotation;
import fr.univnantes.lina.uima.tkregex.LabelledAnnotation;
import fr.univnantes.lina.uima.tkregex.RegexOccurrence;

public class TermSuiteUtils {
    private static final String GROUPING_KEY_FORMAT = "%s: %s";
    public static final IndexingKey<String, String> KEY_ONE_FIRST_LETTERS = getNFirstLetterIndexingKey(1);
    public static final IndexingKey<String, String> KEY_TWO_FIRST_LETTERS = getNFirstLetterIndexingKey(2);
    public static final IndexingKey<String, String> KEY_THREE_FIRST_LETTERS = getNFirstLetterIndexingKey(3);

    public static IndexingKey<String, String> getNFirstLetterIndexingKey(final int n) {
        Preconditions.checkArgument(n > 0, "n must be greater than 0");
        return new IndexingKey<String, String>() {
            @Override
            public String getIndexKey(String fullString) {
                if (fullString.length() <= n)
                    return fullString;
                else
                    return fullString.substring(0, n);

            }
        };
    }

    public static String getSingleWordTermId(WordAnnotation word) {
        /*
         * Single word terms are pre-gathered by their lemma
         */
        return word.getLemma();
    }

    public static <T> LinkedHashMap<T, Integer> getCounters(Iterable<T> list) {
        Comparator<Entry<T, MutableInt>> comparator = new Comparator<Entry<T, MutableInt>>() {
            public int compare(Entry<T, MutableInt> o1, Entry<T, MutableInt> o2) {
                return ComparisonChain.start().compare(o2.getValue(), o1.getValue()).result();
            };
        };

        Map<T, MutableInt> map = Maps.newHashMap();
        for (T e : list) {
            MutableInt counter = map.get(e);
            if (counter == null) {
                counter = new MutableInt(0);
                map.put(e, counter);
            }
            counter.increment();
        }
        List<Entry<T, MutableInt>> entries = Lists.newArrayList(map.entrySet());
        Collections.sort(entries, comparator);
        LinkedHashMap<T, Integer> counters = Maps.newLinkedHashMap();
        for (Entry<T, MutableInt> e : entries)
            counters.put(e.getKey(), e.getValue().intValue());
        return counters;
    }

    public static String trimInside(String coveredText) {
        return coveredText
                .replaceAll(TermSuiteConstants.WHITESPACE_PATTERN_STRING, TermSuiteConstants.WHITESPACE_STRING)
                .trim();
    }

    public static String getGroupingKey(TermOccAnnotation annotation) {
        StringBuilder patternSb = new StringBuilder();
        List<String> lemmas = Lists.newArrayListWithExpectedSize(annotation.getWords().size());
        for (int i = 0; i < annotation.getWords().size(); i++) {
            patternSb.append(annotation.getPattern(i).toLowerCase());
            lemmas.add(annotation.getWords(i).getLemma());
        }
        return toGroupingKey(patternSb, lemmas);
    }

    public static String toGroupingKey(RegexOccurrence occurrence) {
        StringBuilder builder = new StringBuilder();
        builder.append(Joiner.on("").join(occurrence.getLabels())).append(TermSuiteConstants.COLONS)
                .append(TermSuiteConstants.WHITESPACE);

        int i = 0;
        for (LabelledAnnotation la : occurrence.getLabelledAnnotations()) {
            if (i >= 1)
                builder.append(TermSuiteConstants.WHITESPACE);
            builder.append(((WordAnnotation) la.getAnnotation()).getLemma());
            i++;
        }
        return builder.toString().toLowerCase();
    }

    private static String toGroupingKey(StringBuilder patternSb, List<String> lemmas) {
        return String.format(GROUPING_KEY_FORMAT, patternSb.toString(),
                Joiner.on(TermSuiteConstants.WHITESPACE).join(lemmas));
    }

    public static String getGroupingKey(TermWord... words) {
        return getGroupingKey(Lists.newArrayList(words));
    }

    public static String getGroupingKey(Collection<TermWord> words) {
        StringBuilder patternSb = new StringBuilder();
        List<String> lemmas = Lists.newArrayListWithExpectedSize(words.size());
        for (TermWord tw : words) {
            patternSb.append(tw.getSyntacticLabel().toLowerCase());
            lemmas.add(tw.getWord().getLemma());
        }
        return toGroupingKey(patternSb, lemmas);
    }

    /**
     * 
     */
    public static void listClasspath() {
        ClassLoader cl = ClassLoader.getSystemClassLoader();

        URL[] urls = ((URLClassLoader) cl).getURLs();

        for (URL url : urls) {
            System.out.println(url.getFile());
        }
    }

    //   /**
    //    * Adds a path (jar or directory) to classpath of default Class loader
    //    * @param path
    //    */
    //   public static void addToClasspath(String path) {
    //      URLClassLoader urlClassLoader = null;
    //      try {
    //          File f = new File(path);
    //          Preconditions.checkArgument(f.exists(), "No such file: %s", path);
    //          if(f.isFile()) {
    //             ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(f));
    //             boolean isZipped = zipInputStream.getNextEntry() != null;
    //             Preconditions.checkArgument(isZipped, "No such file: %s", path);
    //             zipInputStream.close();
    //          } else
    //             Preconditions.checkArgument(f.isDirectory(), "Should be a directory or a jar : %s", f.getAbsolutePath());
    //          URI u = f.toURI();
    //          urlClassLoader = (URLClassLoader) ClassLoader.getSystemClassLoader();
    //          Class<URLClassLoader> urlClass = URLClassLoader.class;
    //          Method method;
    //         method = urlClass.getDeclaredMethod("addURL", new Class[]{URL.class});
    //         method.setAccessible(true);
    //         LOGGER.info("Adding {} to system class loader");
    //         method.invoke(urlClassLoader, new Object[]{u.toURL()});
    //      } catch (Exception e) {
    //         throw new RuntimeException("Could not add "+path+" to classpath", e);
    //      }
    //   }
}