Example usage for opennlp.tools.ngram NGramModel NGramModel

List of usage examples for opennlp.tools.ngram NGramModel NGramModel

Introduction

In this page you can find the example usage for opennlp.tools.ngram NGramModel NGramModel.

Prototype

public NGramModel() 

Source Link

Document

Initializes an empty instance.

Usage

From source file:opennlp.tools.ngram.NGramModelTest.java

@Test
public void testCutoff1() throws Exception {
    NGramModel ngramModel = new NGramModel();
    StringList tokens = new StringList("the", "brown", "fox", "jumped");
    ngramModel.add(tokens, 1, 3);//from   w ww . j  a v a 2  s. c  o  m
    ngramModel.cutoff(2, 4);
    Assert.assertEquals(0, ngramModel.size());
}

From source file:opennlp.tools.ngram.NGramModelTest.java

@Test
public void testCutoff2() throws Exception {
    NGramModel ngramModel = new NGramModel();
    StringList tokens = new StringList("the", "brown", "fox", "jumped");
    ngramModel.add(tokens, 1, 3);/*from   ww w .  j a va2 s. c om*/
    ngramModel.cutoff(1, 3);
    Assert.assertEquals(9, ngramModel.size());
}

From source file:opennlp.tools.ngram.NGramModelTest.java

@Test
public void testToDictionary() throws Exception {
    NGramModel ngramModel = new NGramModel();
    StringList tokens = new StringList("the", "brown", "fox", "jumped");
    ngramModel.add(tokens, 1, 3);//w ww. j av  a  2  s .  c om
    tokens = new StringList("the", "brown", "Fox", "jumped");
    ngramModel.add(tokens, 1, 3);
    Dictionary dictionary = ngramModel.toDictionary();
    Assert.assertNotNull(dictionary);
    Assert.assertEquals(9, dictionary.size());
    Assert.assertEquals(1, dictionary.getMinTokenCount());
    Assert.assertEquals(3, dictionary.getMaxTokenCount());
}

From source file:opennlp.tools.ngram.NGramModelTest.java

@Test
public void testToDictionary1() throws Exception {
    NGramModel ngramModel = new NGramModel();
    StringList tokens = new StringList("the", "brown", "fox", "jumped");
    ngramModel.add(tokens, 1, 3);//w w w. java 2  s.  c om
    tokens = new StringList("the", "brown", "Fox", "jumped");
    ngramModel.add(tokens, 1, 3);
    Dictionary dictionary = ngramModel.toDictionary(true);
    Assert.assertNotNull(dictionary);
    Assert.assertEquals(14, dictionary.size());
    Assert.assertEquals(1, dictionary.getMinTokenCount());
    Assert.assertEquals(3, dictionary.getMaxTokenCount());
}

From source file:opennlp.tools.ngram.NGramModelTest.java

@Ignore
@Test/*from www.j  av a 2 s  .com*/
public void testSerialize() throws Exception {
    NGramModel ngramModel = new NGramModel();
    StringList tokens = new StringList("the", "brown", "fox", "jumped");
    ngramModel.add(tokens, 1, 3);
    tokens = new StringList("the", "brown", "Fox", "jumped");
    ngramModel.add(tokens, 1, 3);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    ngramModel.serialize(out);
    Assert.assertNotNull(out);
    InputStream nGramModelStream = getClass().getResourceAsStream("/opennlp/tools/ngram/ngram-model.xml");
    String modelString = IOUtils.toString(nGramModelStream);
    // remove AL header
    int start = modelString.indexOf("<!--");
    int end = modelString.indexOf("-->");
    String asfHeaderString = modelString.substring(start, end + 3);
    modelString = modelString.replace(asfHeaderString, "");
    String outputString = out.toString(Charset.forName("UTF-8").name());
    Assert.assertEquals(
            modelString.replaceAll("\n", "").replaceAll("\r", "").replaceAll("\t", "").replaceAll(" ", ""),
            outputString.replaceAll("\n", "").replaceAll("\r", "").replaceAll("\t", "").replaceAll(" ", ""));
}