List of usage examples for opennlp.tools.ngram NGramModel add
public void add(CharSequence chars, int minLength, int maxLength)
From source file:opennlp.tools.ngram.NGramModelTest.java
@Test public void testAdd2() throws Exception { NGramModel ngramModel = new NGramModel(); ngramModel.add(new StringList("the", "bro", "wn"), 2, 3); int count = ngramModel.getCount(new StringList("the", "bro", "wn")); Assert.assertEquals(1, count);//from www . j av a2 s. c o m Assert.assertEquals(3, ngramModel.size()); }
From source file:opennlp.tools.ngram.NGramModelTest.java
@Test public void testAdd3() throws Exception { NGramModel ngramModel = new NGramModel(); ngramModel.add(new StringList("the", "brown", "fox"), 2, 3); int count = ngramModel.getCount(new StringList("the", "brown", "fox")); Assert.assertEquals(1, count);//from w w w.j a v a 2s . c o m count = ngramModel.getCount(new StringList("the", "brown")); Assert.assertEquals(1, count); count = ngramModel.getCount(new StringList("brown", "fox")); Assert.assertEquals(1, count); Assert.assertEquals(3, ngramModel.size()); }
From source file:opennlp.tools.ngram.NGramModelTest.java
@Test public void testContains2() throws Exception { NGramModel ngramModel = new NGramModel(); StringList tokens = new StringList("the", "bro", "wn"); ngramModel.add(tokens, 1, 3); Assert.assertTrue(ngramModel.contains(new StringList("the"))); }
From source file:opennlp.tools.ngram.NGramModelTest.java
@Test public void testNumberOfGrams() throws Exception { NGramModel ngramModel = new NGramModel(); StringList tokens = new StringList("the", "bro", "wn"); ngramModel.add(tokens, 1, 3); Assert.assertEquals(6, ngramModel.numberOfGrams()); }
From source file:opennlp.tools.ngram.NGramModelTest.java
@Test public void testCutoff1() throws Exception { NGramModel ngramModel = new NGramModel(); StringList tokens = new StringList("the", "brown", "fox", "jumped"); ngramModel.add(tokens, 1, 3); ngramModel.cutoff(2, 4);//w ww . ja va 2s . c o m Assert.assertEquals(0, ngramModel.size()); }
From source file:opennlp.tools.ngram.NGramModelTest.java
@Test public void testCutoff2() throws Exception { NGramModel ngramModel = new NGramModel(); StringList tokens = new StringList("the", "brown", "fox", "jumped"); ngramModel.add(tokens, 1, 3); ngramModel.cutoff(1, 3);/* ww w . ja va2s . c o m*/ Assert.assertEquals(9, ngramModel.size()); }
From source file:opennlp.tools.ngram.NGramModelTest.java
@Test public void testToDictionary() throws Exception { NGramModel ngramModel = new NGramModel(); StringList tokens = new StringList("the", "brown", "fox", "jumped"); ngramModel.add(tokens, 1, 3); tokens = new StringList("the", "brown", "Fox", "jumped"); ngramModel.add(tokens, 1, 3);//from www .ja v a 2 s. co m Dictionary dictionary = ngramModel.toDictionary(); Assert.assertNotNull(dictionary); Assert.assertEquals(9, dictionary.size()); Assert.assertEquals(1, dictionary.getMinTokenCount()); Assert.assertEquals(3, dictionary.getMaxTokenCount()); }
From source file:opennlp.tools.ngram.NGramModelTest.java
@Test public void testToDictionary1() throws Exception { NGramModel ngramModel = new NGramModel(); StringList tokens = new StringList("the", "brown", "fox", "jumped"); ngramModel.add(tokens, 1, 3); tokens = new StringList("the", "brown", "Fox", "jumped"); ngramModel.add(tokens, 1, 3);//from w w w . j a v a2 s .c o m Dictionary dictionary = ngramModel.toDictionary(true); Assert.assertNotNull(dictionary); Assert.assertEquals(14, dictionary.size()); Assert.assertEquals(1, dictionary.getMinTokenCount()); Assert.assertEquals(3, dictionary.getMaxTokenCount()); }
From source file:opennlp.tools.ngram.NGramModelTest.java
@Ignore @Test//from w w w . j a v a 2 s . c om public void testSerialize() throws Exception { NGramModel ngramModel = new NGramModel(); StringList tokens = new StringList("the", "brown", "fox", "jumped"); ngramModel.add(tokens, 1, 3); tokens = new StringList("the", "brown", "Fox", "jumped"); ngramModel.add(tokens, 1, 3); ByteArrayOutputStream out = new ByteArrayOutputStream(); ngramModel.serialize(out); Assert.assertNotNull(out); InputStream nGramModelStream = getClass().getResourceAsStream("/opennlp/tools/ngram/ngram-model.xml"); String modelString = IOUtils.toString(nGramModelStream); // remove AL header int start = modelString.indexOf("<!--"); int end = modelString.indexOf("-->"); String asfHeaderString = modelString.substring(start, end + 3); modelString = modelString.replace(asfHeaderString, ""); String outputString = out.toString(Charset.forName("UTF-8").name()); Assert.assertEquals( modelString.replaceAll("\n", "").replaceAll("\r", "").replaceAll("\t", "").replaceAll(" ", ""), outputString.replaceAll("\n", "").replaceAll("\r", "").replaceAll("\t", "").replaceAll(" ", "")); }