List of usage examples for opennlp.tools.util Span Span
public Span(Span span, double prob)
From source file:com.civis.utils.opennlp.models.address.AddressSpanBuilder.java
private void parse(String[] tokens) { Span streetSpan = createStreetSpan(originalSpan.getStart(), originalSpan.getEnd(), tokens); street = buildString(streetSpan, tokens); Span streetNumberSpan = new Span(streetSpan.getEnd(), streetSpan.getEnd() + 1); streetNumber = buildString(streetNumberSpan, tokens); Span zipSpan = new Span(streetNumberSpan.getEnd(), streetNumberSpan.getEnd() + 1); zip = buildString(zipSpan, tokens);// w w w . ja v a 2 s .c o m zip = zip.replaceAll("[+.^:,]", ""); if (StringUtils.isBlank(zip)) { // token include only special chars like , or . //try next zip token // use case Lindenstr. 19 , 12207 Berlin zipSpan = new Span(zipSpan.getStart() + 1, zipSpan.getEnd() + 1); zip = buildString(zipSpan, tokens); } CSVAddressData csvAddressData = findAddressDataByZip(zip); if (csvAddressData != null) { city = csvAddressData.getCity(); country = "Deutschland"; } else { String cityAndMaybeCountry = buildString(zipSpan.getEnd(), originalSpan.getEnd(), tokens); country = tryToFindCountry(cityAndMaybeCountry); if (country == null) { // no country found, means rest string is a city string city = cityAndMaybeCountry; } else { city = cityAndMaybeCountry.replace(country, "").trim(); } } }
From source file:com.civis.utils.opennlp.models.address.AddressSpanBuilder.java
private Span createStreetSpan(int start, int end, String[] tokens) { for (int i = start; i < end; i++) { if (StreetNumberFeature.STREET_NUMBER_PATTERN.matcher(tokens[i]).matches()) { return new Span(start, i); }// www .j av a2 s. co m } return new Span(start, end); }
From source file:com.civprod.writerstoolbox.testarea.UnsupervisedDiscourseSegmentation.java
public static List<List<String>> segment(Document<?> inDocument, SentenceDetector inSentenceDetector, StringTokenizer inStringTokenizer) { List<String> concatenateTokens = concatenateTokens(inDocument, inSentenceDetector, inStringTokenizer); List<String> stemmAndFilterList = TokenUtil.stemmAndFilterList(concatenateTokens); List<List<String>> splitIntoFixLengthLists = splitIntoFixLengthLists(stemmAndFilterList, 20); List<Counter<String>> counters = splitIntoFixLengthLists.parallelStream() .map((List<String> curSentence) -> CounterUtils.count(curSentence)).collect(Collectors.toList()); List<Double> cosineSimilarity = new ArrayList<>(counters.size() - 20); for (int i = 0; i < (counters.size() - 20); i++) { cosineSimilarity.add(cosineSimilarityStemmedAndFiltered(Counter.join(counters.subList(i, i + 10)), Counter.join(counters.subList(i + 11, i + 20)))); }/*from ww w . j av a2 s. c om*/ List<Double> valleys = new ArrayList<>(cosineSimilarity.size() - 2); for (int i = 0; i < valleys.size(); i++) { double ya1 = cosineSimilarity.get(i); double ya2 = cosineSimilarity.get(i + 1); double ya3 = cosineSimilarity.get(i + 2); valleys.add((ya1 - ya2) + (ya3 - ya2)); } SummaryStatistics valleyStatistics = valleys.parallelStream().collect(SummaryStatisticCollector.instance); double cutoffThreshold = valleyStatistics.getMean() - valleyStatistics.getStandardDeviation(); int lastLocation = 0; List<Span> spans = new ArrayList<>(1); for (int i = 0; i < valleys.size(); i++) { double curValley = valleys.get(i); if (curValley < cutoffThreshold) { int curLocation = (i + 11) * 20; spans.add(new Span(lastLocation, curLocation)); lastLocation = curLocation; } } spans.add(new Span(lastLocation, concatenateTokens.size())); return spans.parallelStream() .map((Span curSpan) -> concatenateTokens.subList(curSpan.getStart(), curSpan.getEnd())) .collect(Collectors.toList()); }
From source file:de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpParser.java
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { CAS cas = aJCas.getCas();/*from w w w .j a va2 s . c o m*/ modelProvider.configure(cas); mappingProvider.configure(cas); for (Sentence sentence : select(aJCas, Sentence.class)) { List<Token> tokens = selectCovered(aJCas, Token.class, sentence); Parse parseInput = new Parse(cas.getDocumentText(), new Span(sentence.getBegin(), sentence.getEnd()), AbstractBottomUpParser.INC_NODE, 0, 0); int i = 0; for (Token t : tokens) { parseInput.insert(new Parse(cas.getDocumentText(), new Span(t.getBegin(), t.getEnd()), AbstractBottomUpParser.TOK_NODE, 0, i)); i++; } Parse parseOutput = modelProvider.getResource().parse(parseInput); createConstituentAnnotationFromTree(aJCas, parseOutput, null, tokens); if (createPennTreeString) { StringBuffer sb = new StringBuffer(); parseOutput.setType("ROOT"); // in DKPro the root is ROOT, not TOP parseOutput.show(sb); PennTree pTree = new PennTree(aJCas, sentence.getBegin(), sentence.getEnd()); pTree.setPennTree(sb.toString()); pTree.addToIndexes(); } } }
From source file:opennlp.tools.util.Span.java
/** * Test for {@link Span#getStart()}. */ public void testGetStart() { Assert.assertEquals(5, new Span(5, 6).getStart()); }
From source file:opennlp.tools.util.Span.java
/** * Test for {@link Span#getEnd()}. */ public void testGetEnd() { Assert.assertEquals(6, new Span(5, 6).getEnd()); }
From source file:opennlp.tools.util.Span.java
/** * Test for {@link Span#length()}. */ public void testLength() { Assert.assertEquals(11, new Span(10, 21).length()); }
From source file:opennlp.tools.util.Span.java
/** * Test for {@link Span#contains(Span)}. *///from w w w . ja v a 2 s. c om public void testContains() { Span a = new Span(500, 900); Span b = new Span(520, 600); Assert.assertEquals(true, a.contains(b)); }
From source file:opennlp.tools.util.Span.java
/** * Test for {@link Span#contains(Span)}. *///www .jav a2 s . c o m public void testContainsWithEqual() { Span a = new Span(500, 900); Assert.assertEquals(true, a.contains(a)); }
From source file:opennlp.tools.util.Span.java
/** * Test for {@link Span#contains(Span)}. *///from ww w. j a v a2s . c om public void testContainsWithLowerIntersect() { Span a = new Span(500, 900); Span b = new Span(450, 1000); Assert.assertEquals(false, a.contains(b)); }