List of usage examples for opennlp.tools.ngram NGramModel NGramModel
public NGramModel()
From source file:opennlp.tools.ngram.NGramModelTest.java
@Test public void testCutoff1() throws Exception { NGramModel ngramModel = new NGramModel(); StringList tokens = new StringList("the", "brown", "fox", "jumped"); ngramModel.add(tokens, 1, 3);//from w ww . j a v a 2 s. c o m ngramModel.cutoff(2, 4); Assert.assertEquals(0, ngramModel.size()); }
From source file:opennlp.tools.ngram.NGramModelTest.java
@Test public void testCutoff2() throws Exception { NGramModel ngramModel = new NGramModel(); StringList tokens = new StringList("the", "brown", "fox", "jumped"); ngramModel.add(tokens, 1, 3);/*from ww w . j a va2 s. c om*/ ngramModel.cutoff(1, 3); Assert.assertEquals(9, ngramModel.size()); }
From source file:opennlp.tools.ngram.NGramModelTest.java
@Test public void testToDictionary() throws Exception { NGramModel ngramModel = new NGramModel(); StringList tokens = new StringList("the", "brown", "fox", "jumped"); ngramModel.add(tokens, 1, 3);//w ww. j av a 2 s . c om tokens = new StringList("the", "brown", "Fox", "jumped"); ngramModel.add(tokens, 1, 3); Dictionary dictionary = ngramModel.toDictionary(); Assert.assertNotNull(dictionary); Assert.assertEquals(9, dictionary.size()); Assert.assertEquals(1, dictionary.getMinTokenCount()); Assert.assertEquals(3, dictionary.getMaxTokenCount()); }
From source file:opennlp.tools.ngram.NGramModelTest.java
@Test public void testToDictionary1() throws Exception { NGramModel ngramModel = new NGramModel(); StringList tokens = new StringList("the", "brown", "fox", "jumped"); ngramModel.add(tokens, 1, 3);//w w w. java 2 s. c om tokens = new StringList("the", "brown", "Fox", "jumped"); ngramModel.add(tokens, 1, 3); Dictionary dictionary = ngramModel.toDictionary(true); Assert.assertNotNull(dictionary); Assert.assertEquals(14, dictionary.size()); Assert.assertEquals(1, dictionary.getMinTokenCount()); Assert.assertEquals(3, dictionary.getMaxTokenCount()); }
From source file:opennlp.tools.ngram.NGramModelTest.java
@Ignore @Test/*from www.j av a 2 s .com*/ public void testSerialize() throws Exception { NGramModel ngramModel = new NGramModel(); StringList tokens = new StringList("the", "brown", "fox", "jumped"); ngramModel.add(tokens, 1, 3); tokens = new StringList("the", "brown", "Fox", "jumped"); ngramModel.add(tokens, 1, 3); ByteArrayOutputStream out = new ByteArrayOutputStream(); ngramModel.serialize(out); Assert.assertNotNull(out); InputStream nGramModelStream = getClass().getResourceAsStream("/opennlp/tools/ngram/ngram-model.xml"); String modelString = IOUtils.toString(nGramModelStream); // remove AL header int start = modelString.indexOf("<!--"); int end = modelString.indexOf("-->"); String asfHeaderString = modelString.substring(start, end + 3); modelString = modelString.replace(asfHeaderString, ""); String outputString = out.toString(Charset.forName("UTF-8").name()); Assert.assertEquals( modelString.replaceAll("\n", "").replaceAll("\r", "").replaceAll("\t", "").replaceAll(" ", ""), outputString.replaceAll("\n", "").replaceAll("\r", "").replaceAll("\t", "").replaceAll(" ", "")); }