Example usage for opennlp.tools.util Span Span

Introduction

In this page you can find the example usage for opennlp.tools.util Span Span.

Prototype

public Span(Span span, double prob)

Source Link

Document

Creates a new immutable span based on an existing span, where the existing span did not include the prob

Usage

From source file:com.civis.utils.opennlp.models.address.AddressSpanBuilder.java

private void parse(String[] tokens) {
    Span streetSpan = createStreetSpan(originalSpan.getStart(), originalSpan.getEnd(), tokens);
    street = buildString(streetSpan, tokens);
    Span streetNumberSpan = new Span(streetSpan.getEnd(), streetSpan.getEnd() + 1);
    streetNumber = buildString(streetNumberSpan, tokens);
    Span zipSpan = new Span(streetNumberSpan.getEnd(), streetNumberSpan.getEnd() + 1);
    zip = buildString(zipSpan, tokens);// w w  w  . ja  v a  2 s  .c  o  m
    zip = zip.replaceAll("[+.^:,]", "");
    if (StringUtils.isBlank(zip)) {
        // token include only special chars like , or .
        //try next zip token
        // use case Lindenstr. 19 , 12207 Berlin
        zipSpan = new Span(zipSpan.getStart() + 1, zipSpan.getEnd() + 1);
        zip = buildString(zipSpan, tokens);
    }

    CSVAddressData csvAddressData = findAddressDataByZip(zip);
    if (csvAddressData != null) {
        city = csvAddressData.getCity();
        country = "Deutschland";
    } else {
        String cityAndMaybeCountry = buildString(zipSpan.getEnd(), originalSpan.getEnd(), tokens);
        country = tryToFindCountry(cityAndMaybeCountry);
        if (country == null) {
            // no country found, means rest string is a city string
            city = cityAndMaybeCountry;
        } else {
            city = cityAndMaybeCountry.replace(country, "").trim();
        }
    }
}

From source file:com.civis.utils.opennlp.models.address.AddressSpanBuilder.java

private Span createStreetSpan(int start, int end, String[] tokens) {
    for (int i = start; i < end; i++) {
        if (StreetNumberFeature.STREET_NUMBER_PATTERN.matcher(tokens[i]).matches()) {
            return new Span(start, i);
        }// www  .j  av a2 s. co  m
    }

    return new Span(start, end);
}

From source file:com.civprod.writerstoolbox.testarea.UnsupervisedDiscourseSegmentation.java

public static List<List<String>> segment(Document<?> inDocument, SentenceDetector inSentenceDetector,
        StringTokenizer inStringTokenizer) {
    List<String> concatenateTokens = concatenateTokens(inDocument, inSentenceDetector, inStringTokenizer);
    List<String> stemmAndFilterList = TokenUtil.stemmAndFilterList(concatenateTokens);
    List<List<String>> splitIntoFixLengthLists = splitIntoFixLengthLists(stemmAndFilterList, 20);
    List<Counter<String>> counters = splitIntoFixLengthLists.parallelStream()
            .map((List<String> curSentence) -> CounterUtils.count(curSentence)).collect(Collectors.toList());
    List<Double> cosineSimilarity = new ArrayList<>(counters.size() - 20);
    for (int i = 0; i < (counters.size() - 20); i++) {
        cosineSimilarity.add(cosineSimilarityStemmedAndFiltered(Counter.join(counters.subList(i, i + 10)),
                Counter.join(counters.subList(i + 11, i + 20))));
    }/*from ww  w .  j av a2  s.  c om*/
    List<Double> valleys = new ArrayList<>(cosineSimilarity.size() - 2);
    for (int i = 0; i < valleys.size(); i++) {
        double ya1 = cosineSimilarity.get(i);
        double ya2 = cosineSimilarity.get(i + 1);
        double ya3 = cosineSimilarity.get(i + 2);
        valleys.add((ya1 - ya2) + (ya3 - ya2));
    }
    SummaryStatistics valleyStatistics = valleys.parallelStream().collect(SummaryStatisticCollector.instance);
    double cutoffThreshold = valleyStatistics.getMean() - valleyStatistics.getStandardDeviation();
    int lastLocation = 0;
    List<Span> spans = new ArrayList<>(1);
    for (int i = 0; i < valleys.size(); i++) {
        double curValley = valleys.get(i);
        if (curValley < cutoffThreshold) {
            int curLocation = (i + 11) * 20;
            spans.add(new Span(lastLocation, curLocation));
            lastLocation = curLocation;
        }
    }
    spans.add(new Span(lastLocation, concatenateTokens.size()));
    return spans.parallelStream()
            .map((Span curSpan) -> concatenateTokens.subList(curSpan.getStart(), curSpan.getEnd()))
            .collect(Collectors.toList());
}

From source file:de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpParser.java

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    CAS cas = aJCas.getCas();/*from w w w .j a  va2 s  .  c  o  m*/

    modelProvider.configure(cas);
    mappingProvider.configure(cas);

    for (Sentence sentence : select(aJCas, Sentence.class)) {
        List<Token> tokens = selectCovered(aJCas, Token.class, sentence);

        Parse parseInput = new Parse(cas.getDocumentText(), new Span(sentence.getBegin(), sentence.getEnd()),
                AbstractBottomUpParser.INC_NODE, 0, 0);
        int i = 0;
        for (Token t : tokens) {
            parseInput.insert(new Parse(cas.getDocumentText(), new Span(t.getBegin(), t.getEnd()),
                    AbstractBottomUpParser.TOK_NODE, 0, i));
            i++;
        }

        Parse parseOutput = modelProvider.getResource().parse(parseInput);

        createConstituentAnnotationFromTree(aJCas, parseOutput, null, tokens);

        if (createPennTreeString) {
            StringBuffer sb = new StringBuffer();
            parseOutput.setType("ROOT"); // in DKPro the root is ROOT, not TOP
            parseOutput.show(sb);

            PennTree pTree = new PennTree(aJCas, sentence.getBegin(), sentence.getEnd());
            pTree.setPennTree(sb.toString());
            pTree.addToIndexes();
        }
    }
}

From source file:opennlp.tools.util.Span.java

/**
     * Test for {@link Span#getStart()}.
     */
    public void testGetStart() {
        Assert.assertEquals(5, new Span(5, 6).getStart());
    }

From source file:opennlp.tools.util.Span.java

/**
     * Test for {@link Span#getEnd()}.
     */
    public void testGetEnd() {
        Assert.assertEquals(6, new Span(5, 6).getEnd());
    }

From source file:opennlp.tools.util.Span.java

/**
     * Test for {@link Span#length()}.
     */
    public void testLength() {
        Assert.assertEquals(11, new Span(10, 21).length());
    }

From source file:opennlp.tools.util.Span.java

/**
     * Test for {@link Span#contains(Span)}.
     *///from w  w w . ja  v  a  2 s.  c om
    public void testContains() {
        Span a = new Span(500, 900);
        Span b = new Span(520, 600);

        Assert.assertEquals(true, a.contains(b));
    }

From source file:opennlp.tools.util.Span.java

/**
     * Test for {@link Span#contains(Span)}.
     *///www  .jav  a2 s .  c  o  m
    public void testContainsWithEqual() {
        Span a = new Span(500, 900);

        Assert.assertEquals(true, a.contains(a));
    }

From source file:opennlp.tools.util.Span.java

/**
     * Test for {@link Span#contains(Span)}.
     *///from ww w. j a  v  a2s  . c  om
    public void testContainsWithLowerIntersect() {
        Span a = new Span(500, 900);
        Span b = new Span(450, 1000);

        Assert.assertEquals(false, a.contains(b));
    }