List of usage examples for org.apache.hadoop.util.bloom Key Key
public Key()
From source file:hivemall.sketch.bloom.BloomAndUDFTest.java
License:Apache License
@Nonnull private static DynamicBloomFilter createBloomFilter(long seed, int size) { DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(3000); final Key key = new Key(); final Random rnd1 = new Random(seed); for (int i = 0; i < size; i++) { double d = rnd1.nextGaussian(); String s = Double.toHexString(d); key.set(s.getBytes(), 1.0);/* w w w. java 2 s. co m*/ dbf.add(key); } return dbf; }
From source file:hivemall.sketch.bloom.BloomAndUDFTest.java
License:Apache License
private static void assertNotContains(@Nonnull Filter expected, @Nonnull Filter actual, long seed, int size) { final Key key = new Key(); final Random rnd1 = new Random(seed); for (int i = 0; i < size; i++) { double d = rnd1.nextGaussian(); String s = Double.toHexString(d); key.set(s.getBytes(), 1.0);//from w w w . j a v a 2 s .c o m Assert.assertEquals(expected.membershipTest(key), actual.membershipTest(key)); } }
From source file:hivemall.sketch.bloom.BloomContainsUDFTest.java
License:Apache License
@Nonnull private static DynamicBloomFilter createBloomFilter(long seed, int size) { DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(30); final Key key = new Key(); final Random rnd1 = new Random(seed); for (int i = 0; i < size; i++) { double d = rnd1.nextGaussian(); String s = Double.toHexString(d); Text t = new Text(s); key.set(t.getBytes(), 1.0);//from w ww .j av a2s . c om dbf.add(key); } return dbf; }
From source file:hivemall.sketch.bloom.BloomFilterUtilsTest.java
License:Apache License
@Test public void testDynamicBloomFilter() { DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(300000); final Key key = new Key(); final Random rnd1 = new Random(43L); for (int i = 0; i < 1000000; i++) { double d = rnd1.nextGaussian(); String s = Double.toHexString(d); key.set(s.getBytes(), 1.0);/*from ww w . j a va2 s .co m*/ dbf.add(key); } final Random rnd2 = new Random(43L); for (int i = 0; i < 1000000; i++) { double d = rnd2.nextGaussian(); String s = Double.toHexString(d); key.set(s.getBytes(), 1.0); Assert.assertTrue(dbf.membershipTest(key)); } }
From source file:hivemall.sketch.bloom.BloomFilterUtilsTest.java
License:Apache License
@Test public void testDynamicBloomFilterSerde() throws IOException { final Key key = new Key(); DynamicBloomFilter dbf1 = BloomFilterUtils.newDynamicBloomFilter(300000); final Random rnd1 = new Random(43L); for (int i = 0; i < 1000000; i++) { double d = rnd1.nextGaussian(); String s = Double.toHexString(d); key.set(s.getBytes(), 1.0);/*ww w . j ava 2 s. c o m*/ dbf1.add(key); } DynamicBloomFilter dbf2 = BloomFilterUtils.deserialize(BloomFilterUtils.serialize(dbf1), new DynamicBloomFilter()); final Random rnd2 = new Random(43L); for (int i = 0; i < 1000000; i++) { double d = rnd2.nextGaussian(); String s = Double.toHexString(d); key.set(s.getBytes(), 1.0); Assert.assertTrue(dbf2.membershipTest(key)); } }
From source file:hivemall.sketch.bloom.BloomOrUDFTest.java
License:Apache License
private static void assertEquals(@Nonnull Filter expected, @Nonnull Filter actual, long seed, int size) { final Key key = new Key(); final Random rnd1 = new Random(seed); for (int i = 0; i < size; i++) { double d = rnd1.nextGaussian(); String s = Double.toHexString(d); key.set(s.getBytes(), 1.0);//from w w w . ja va 2s . c o m Assert.assertEquals(expected.membershipTest(key), actual.membershipTest(key)); } }
From source file:org.apache.mahout.utils.nlp.collocations.llr.BloomTokenFilter.java
License:Apache License
/** * @param filter tokens will be checked for membership in this bloom filter * @param in the tokenstream to read.//from w w w. j a v a 2 s. co m * @param keepMembers keep memoers of the bloom filter? If true works like * a whitelist and members found in the list are kept and all others are * dropped. If false works like a stoplist and members found in the * filter are dropped all others are kept. */ public BloomTokenFilter(Filter filter, boolean keepMembers, TokenStream in) { super(in); this.filter = filter; this.keepMembers = keepMembers; this.key = new Key(); this.termAtt = addAttribute(CharTermAttribute.class); this.encoder = Charsets.UTF_8.newEncoder().onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); }
From source file:org.apache.mahout.utils.nlp.collocations.llr.BloomTokenFilterTest.java
License:Apache License
/** test standalone filter without tokenfilter wrapping */ @Test//from ww w.j a v a 2 s . co m public void testFilter() throws IOException { Filter filter = getFilter(filterTokens); Key k = new Key(); for (String s : filterTokens) { setKey(k, s); assertTrue("Key for string " + s + " should be filter member", filter.membershipTest(k)); } for (String s : notFilterTokens) { setKey(k, s); assertFalse("Key for string " + s + " should not be filter member", filter.membershipTest(k)); } }
From source file:org.apache.mahout.utils.nlp.collocations.llr.BloomTokenFilterTest.java
License:Apache License
private static Filter getFilter(String[] tokens) throws IOException { Filter filter = new BloomFilter(100, 50, Hash.JENKINS_HASH); Key k = new Key(); for (String s : tokens) { setKey(k, s);/*from ww w. j a va2 s .co m*/ filter.add(k); } return filter; }