Java tutorial
/******************************************************************************* * Copyright 2012 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universitt Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.dkpro.core.testing; import static java.util.Arrays.asList; import static org.apache.commons.lang.StringUtils.join; import static org.apache.commons.lang.StringUtils.normalizeSpace; import static org.apache.uima.fit.util.JCasUtil.select; import static org.apache.uima.fit.util.JCasUtil.toText; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import junit.framework.Assert; import org.apache.uima.jcas.JCas; import org.codehaus.plexus.util.StringUtils; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.Morpheme; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagDescription; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription; import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity; import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemanticArgument; import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemanticField; import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemanticPredicate; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.PennTree; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.Constituent; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency; public class AssertAnnotations { public static void assertToken(String[] aExpected, Collection<Token> aActual) { if (aExpected == null) { return; } List<String> expected = asList(aExpected); List<String> actual = toText(aActual); System.out.printf("%-20s - Expected: %s%n", "Tokens", asCopyableString(expected)); System.out.printf("%-20s - Actual : %s%n", "Tokens", asCopyableString(actual)); assertEquals(asCopyableString(expected, true), asCopyableString(actual, true)); } public static void assertSentence(String[] aExpected, Collection<Sentence> aActual) { if (aExpected == null) { return; } List<String> expected = asList(aExpected); List<String> actual = toText(aActual); System.out.printf("%-20s - Expected: %s%n", "Sentences", asCopyableString(expected)); System.out.printf("%-20s - Actual : %s%n", "Sentences", asCopyableString(actual)); assertEquals(asCopyableString(expected, true), asCopyableString(actual, true)); } public static void assertPOS(String[] aExpectedMapped, String[] aExpectedOriginal, Collection<POS> actual) { List<String> expectedOriginal = aExpectedOriginal != null ? asList(aExpectedOriginal) : null; List<String> expectedMapped = aExpectedMapped != null ? asList(aExpectedMapped) : null; List<String> actualOriginal = new ArrayList<String>(); List<String> actualMapped = new ArrayList<String>(); for (POS posAnnotation : actual) { actualOriginal.add(posAnnotation.getPosValue()); actualMapped.add(posAnnotation.getType().getShortName()); } if (aExpectedOriginal != null) { System.out.printf("%-20s - Expected: %s%n", "POS (original)", asCopyableString(expectedOriginal)); System.out.printf("%-20s - Actual : %s%n", "POS (original)", asCopyableString(actualOriginal)); } if (aExpectedMapped != null) { System.out.printf("%-20s - Expected: %s%n", "POS (mapped)", asCopyableString(expectedMapped)); System.out.printf("%-20s - Actual : %s%n", "POS (mapped)", asCopyableString(actualMapped)); } if (aExpectedOriginal != null) { assertEquals(asCopyableString(expectedOriginal, true), asCopyableString(actualOriginal, true)); } if (aExpectedMapped != null) { assertEquals(asCopyableString(expectedMapped, true), asCopyableString(actualMapped, true)); } } public static void assertLemma(String[] aExpected, Collection<Lemma> aActual) { if (aExpected == null) { return; } List<String> expected = asList(aExpected); List<String> actual = new ArrayList<String>(); for (Lemma a : aActual) { actual.add(a.getValue()); } System.out.printf("%-20s - Expected: %s%n", "Lemmas", asCopyableString(expected)); System.out.printf("%-20s - Actual : %s%n", "Lemmas", asCopyableString(actual)); assertEquals(asCopyableString(expected, true), asCopyableString(actual, true)); } public static void assertMorpheme(String[] aExpected, Collection<Morpheme> aActual) { if (aExpected == null) { return; } List<String> expected = asList(aExpected); List<String> actual = new ArrayList<String>(); for (Morpheme a : aActual) { actual.add(a.getMorphTag()); } System.out.printf("%-20s - Expected: %s%n", "Morphemes", asCopyableString(expected)); System.out.printf("%-20s - Actual : %s%n", "Morphemes", asCopyableString(actual)); assertEquals(asCopyableString(expected, true), asCopyableString(actual, true)); } public static void assertStem(String[] aExpected, Collection<Stem> aActual) { if (aExpected == null) { return; } List<String> expected = asList(aExpected); List<String> actual = new ArrayList<String>(); for (Stem a : aActual) { actual.add(a.getValue()); } System.out.printf("%-20s - Expected: %s%n", "Stems", asCopyableString(expected)); System.out.printf("%-20s - Actual : %s%n", "Stems", asCopyableString(actual)); assertEquals(asCopyableString(expected, true), asCopyableString(actual, true)); } public static void assertNamedEntity(String[] aExpectedMapped, String[] aExpectedOriginal, Collection<NamedEntity> aActual) { String[] actualTags = new String[aActual.size()]; String[] actualClasses = new String[aActual.size()]; int i = 0; for (NamedEntity a : aActual) { actualTags[i] = String.format("%s '%s'", a.getValue(), a.getCoveredText(), a.getBegin(), a.getEnd()); actualClasses[i] = String.format("%s '%s'", a.getType().getShortName(), a.getCoveredText(), a.getBegin(), a.getEnd()); i++; } List<String> sortedExpectedOriginal = aExpectedOriginal != null ? deduplicateAndSort(asList(aExpectedOriginal)) : null; List<String> sortedExpectedMapped = aExpectedMapped != null ? deduplicateAndSort(asList(aExpectedMapped)) : null; List<String> sortedActualOriginal = deduplicateAndSort(asList(actualTags)); List<String> sortedActualMapped = deduplicateAndSort(asList(actualClasses)); if (aExpectedOriginal != null) { System.out.printf("%-20s - Expected: %s%n", "Named entities (orig.)", asCopyableString(sortedExpectedOriginal)); System.out.printf("%-20s - Actual : %s%n", "Named entities (orig.)", asCopyableString(sortedActualOriginal)); } if (aExpectedMapped != null) { System.out.printf("%-20s - Expected: %s%n", "Named entities (map.)", asCopyableString(sortedExpectedMapped)); System.out.printf("%-20s - Actual : %s%n", "Named entities (map.)", asCopyableString(sortedActualMapped)); } if (aExpectedOriginal != null) { assertEquals(asCopyableString(sortedExpectedOriginal, true), asCopyableString(sortedActualOriginal, true)); } if (aExpectedMapped != null) { assertEquals(asCopyableString(sortedExpectedMapped, true), asCopyableString(sortedActualMapped, true)); } } public static void assertConstituents(String[] aExpectedMapped, String[] aExpectedOriginal, Collection<Constituent> aActual) { String[] actualTags = new String[aActual.size()]; String[] actualClasses = new String[aActual.size()]; int i = 0; for (Constituent a : aActual) { actualTags[i] = String.format("%s %d,%d", a.getConstituentType(), a.getBegin(), a.getEnd()); actualClasses[i] = String.format("%s %d,%d", a.getType().getShortName(), a.getBegin(), a.getEnd()); i++; } List<String> sortedExpectedOriginal = deduplicateAndSort(asList(aExpectedOriginal)); List<String> sortedExpectedMapped = deduplicateAndSort(asList(aExpectedMapped)); List<String> sortedActualOriginal = deduplicateAndSort(asList(actualTags)); List<String> sortedActualMapped = deduplicateAndSort(asList(actualClasses)); if (aExpectedOriginal != null) { System.out.printf("%-20s - Expected: %s%n", "Constituents (orig.)", asCopyableString(sortedExpectedOriginal)); System.out.printf("%-20s - Actual : %s%n", "Constituents (orig.)", asCopyableString(sortedActualOriginal)); } if (aExpectedMapped != null) { System.out.printf("%-20s - Expected: %s%n", "Constituents (map.)", asCopyableString(sortedExpectedMapped)); System.out.printf("%-20s - Actual : %s%n", "Constituents (map.)", asCopyableString(sortedActualMapped)); } if (aExpectedOriginal != null) { assertEquals(asCopyableString(sortedExpectedOriginal, true), asCopyableString(sortedActualOriginal, true)); } if (aExpectedMapped != null) { assertEquals(asCopyableString(sortedExpectedMapped, true), asCopyableString(sortedActualMapped, true)); } } public static void assertSyntacticFunction(String[] aExpectedOriginal, Collection<Constituent> aActual) { List<String> actualTagsList = new ArrayList<String>(); for (Constituent a : aActual) { if (a.getSyntacticFunction() != null) { actualTagsList.add(String.format("%s %d,%d", a.getSyntacticFunction(), a.getBegin(), a.getEnd())); } } String[] actualTags = actualTagsList.toArray(new String[actualTagsList.size()]); List<String> sortedExpectedOriginal = deduplicateAndSort(asList(aExpectedOriginal)); List<String> sortedActualOriginal = deduplicateAndSort(asList(actualTags)); if (aExpectedOriginal != null) { System.out.printf("%-20s - Expected: %s%n", "Syn. func. (orig.)", asCopyableString(sortedExpectedOriginal)); System.out.printf("%-20s - Actual : %s%n", "Syn. func. (orig.)", asCopyableString(sortedActualOriginal)); } if (aExpectedOriginal != null) { assertEquals(asCopyableString(sortedExpectedOriginal, true), asCopyableString(sortedActualOriginal, true)); } } public static <T extends Comparable<T>> List<T> deduplicateAndSort(Collection<T> aCollection) { if (aCollection == null) { return null; } else { List<T> result = new ArrayList<T>(new HashSet<T>(aCollection)); Collections.sort(result); return result; } } public static void assertDependencies(String[] aExpected, Collection<Dependency> aActual) { List<String> expected = new ArrayList<String>(asList(aExpected)); List<String> actual = new ArrayList<String>(); boolean offsetCorrect = true; for (Dependency a : aActual) { actual.add(String.format("[%3d,%3d]%s(%s) D[%d,%d](%s) G[%d,%d](%s)", a.getBegin(), a.getEnd(), a.getClass().getSimpleName(), a.getDependencyType(), a.getDependent().getBegin(), a.getDependent().getEnd(), a.getDependent().getCoveredText(), a.getGovernor().getBegin(), a.getGovernor().getEnd(), a.getGovernor().getCoveredText())); offsetCorrect &= (a.getBegin() == a.getDependent().getBegin()) && (a.getEnd() == a.getDependent().getEnd()); } Collections.sort(actual); Collections.sort(expected); System.out.printf("%-20s - Expected: %s%n", "Dependencies", asCopyableString(expected)); System.out.printf("%-20s - Actual : %s%n", "Dependencies", asCopyableString(actual)); assertEquals(asCopyableString(expected, true), asCopyableString(actual, true)); assertTrue("Dependency offsets must match dependent offsets", offsetCorrect); } public static void assertPennTree(String aExpected, PennTree aActual) { String expected = normalizeSpace(aExpected); String actual = normalizeSpace(aActual != null ? aActual.getPennTree() : "<none>"); System.out.printf("%-20s - Expected: \"%s\"%n", "Penn tree", expected); System.out.printf("%-20s - Actual : \"%s\"%n", "Penn tree", actual); assertEquals(expected, actual); } public static void assertPennTree(String aExpected, String aActual) { String expected = normalizeSpace(aExpected); String actual = normalizeSpace(aActual != null ? aActual : "<none>"); System.out.printf("%-20s - Expected: %s%n", "Penn tree", expected); System.out.printf("%-20s - Actual : %s%n", "Penn tree", actual); assertEquals(expected, actual); } public static void assertSemanticPredicates(String[] aExpected, Collection<SemanticPredicate> aActual) { List<String> expected = new ArrayList<String>(asList(aExpected)); List<String> actual = new ArrayList<String>(); for (SemanticPredicate p : aActual) { StringBuilder sb = new StringBuilder(); sb.append(p.getCoveredText()).append(" (").append(p.getCategory()).append("): ["); for (SemanticArgument a : select(p.getArguments(), SemanticArgument.class)) { sb.append('(').append(a.getRole()).append(':').append(a.getCoveredText()).append(')'); } sb.append(']'); actual.add(sb.toString()); } Collections.sort(actual); Collections.sort(expected); System.out.printf("%-20s - Expected: %s%n", "Semantic predicates", asCopyableString(expected)); System.out.printf("%-20s - Actual : %s%n", "Semantic predicates", asCopyableString(actual)); assertEquals(asCopyableString(expected, true), asCopyableString(actual, true)); } public static void assertSemanticField(String[] aExpected, Collection<SemanticField> aActual) { if (aExpected == null) { return; } List<String> expected = asList(aExpected); List<String> actual = new ArrayList<String>(); for (SemanticField a : aActual) { actual.add(a.getValue()); } System.out.printf("%-20s - Expected: %s%n", "Semantic field values", asCopyableString(expected)); System.out.printf("%-20s - Actual : %s%n", "Semantic field values", asCopyableString(actual)); assertEquals(asCopyableString(expected, true), asCopyableString(actual, true)); } public static void assertTagset(Class<?> aLayer, String aName, String[] aExpected, JCas aJCas) { List<String> expected = new ArrayList<String>(asList(aExpected)); Collections.sort(expected); StringBuilder sb = new StringBuilder(); for (TagsetDescription tsd : select(aJCas, TagsetDescription.class)) { sb.append('\t'); sb.append(tsd.getLayer()); sb.append(" - "); sb.append(tsd.getName()); sb.append('\n'); if (StringUtils.equals(aLayer.getName(), tsd.getLayer()) && StringUtils.equals(aName, tsd.getName())) { List<String> actual = new ArrayList<String>(); for (TagDescription td : select(tsd.getTags(), TagDescription.class)) { actual.add(td.getName()); } Collections.sort(actual); System.out.printf("%-20s - Layer : %s%n", "Layer", tsd.getLayer()); System.out.printf("%-20s - Tagset : %s%n", "Tagset", tsd.getName()); System.out.printf("%-20s - Expected: %s%n", "Tags", asCopyableString(expected)); System.out.printf("%-20s - Actual : %s%n", "Tags", asCopyableString(actual)); assertEquals(asCopyableString(expected, true), asCopyableString(actual, true)); return; } } System.out.println("The CAS does not containg a description for layer [" + aLayer.getName() + "] tagset [" + aName + "]"); System.out.println("What has been found is:\n" + sb); fail("No tagset definition found for layer [" + aLayer.getName() + "] tagset [" + aName + "]"); } public static void assertTagsetMapping(Class<?> aLayer, String aName, String[] aDefaultMapped, JCas aJCas) { String pattern; if (aLayer == POS.class) { pattern = "classpath:/de/tudarmstadt/ukp/dkpro/" + "core/api/lexmorph/tagset/${language}-${tagset}-pos.map"; } else if (aLayer == Dependency.class) { pattern = "classpath:/de/tudarmstadt/ukp/dkpro/" + "core/api/syntax/tagset/${language}-${tagset}-dependency.map"; } else if (aLayer == Constituent.class) { pattern = "classpath:/de/tudarmstadt/ukp/dkpro/" + "core/api/syntax/tagset/${language}-${tagset}-constituency.map"; } else { throw new IllegalArgumentException("Unsupported layer: " + aLayer.getName()); } MappingProvider mp = new MappingProvider(); mp.setDefault(MappingProvider.LOCATION, pattern); mp.setDefault("tagset", aName); mp.configure(aJCas.getCas()); Map<String, String> mapping = mp.getResource(); Assert.assertNotNull("No mapping found for layer [" + aLayer.getName() + "] tagset [" + aName + "]", mapping); List<String> expected = new ArrayList<String>(asList(aDefaultMapped)); Collections.sort(expected); List<String> mappedTags = new ArrayList<String>(mapping.keySet()); Collections.sort(mappedTags); StringBuilder sb = new StringBuilder(); for (TagsetDescription tsd : select(aJCas, TagsetDescription.class)) { sb.append('\t'); sb.append(tsd.getLayer()); sb.append(" - "); sb.append(tsd.getName()); sb.append('\n'); if (StringUtils.equals(aLayer.getName(), tsd.getLayer()) && StringUtils.equals(aName, tsd.getName())) { List<String> actual = new ArrayList<String>(); for (TagDescription td : select(tsd.getTags(), TagDescription.class)) { actual.add(td.getName()); } Collections.sort(actual); // Keep only the unmapped tags actual.removeAll(mappedTags); System.out.printf("%-20s - Layer : %s%n", "Layer", tsd.getLayer()); System.out.printf("%-20s - Tagset : %s%n", "Tagset", tsd.getName()); System.out.printf("%-20s - Expected: %s%n", "Unmapped tags", asCopyableString(expected)); System.out.printf("%-20s - Actual : %s%n", "Unmapped tags", asCopyableString(actual)); assertEquals(asCopyableString(expected, true), asCopyableString(actual, true)); return; } } System.out.println("The CAS does not containg a description for layer [" + aLayer.getName() + "] tagset [" + aName + "]"); System.out.println("What has been found is:\n" + sb); fail("No tagset definition found for layer [" + aLayer.getName() + "] tagset [" + aName + "]"); } public static String asCopyableString(Collection<String> aCollection, boolean aLinebreak) { if (aCollection.isEmpty()) { return "{}"; } else { if (aLinebreak) { return "{\n\"" + join(aCollection, "\",\n\"") + "\"\n}"; } else { return "{ \"" + join(aCollection, "\", \"") + "\" }"; } } } private static String asCopyableString(Collection<String> aCollection) { return asCopyableString(aCollection, false); } }