Example usage for org.apache.hadoop.util.bloom Key Key

List of usage examples for org.apache.hadoop.util.bloom Key Key

Introduction

In this page you can find the example usage for org.apache.hadoop.util.bloom Key Key.

Prototype

public Key() 

Source Link

Document

default constructor - use with readFields

Usage

From source file:hivemall.sketch.bloom.BloomAndUDFTest.java

License:Apache License

@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
    DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(3000);
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);

        key.set(s.getBytes(), 1.0);/* w w  w.  java 2  s.  co m*/
        dbf.add(key);
    }

    return dbf;
}

From source file:hivemall.sketch.bloom.BloomAndUDFTest.java

License:Apache License

private static void assertNotContains(@Nonnull Filter expected, @Nonnull Filter actual, long seed, int size) {
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        key.set(s.getBytes(), 1.0);//from w  w w .  j a v a  2 s .c  o  m
        Assert.assertEquals(expected.membershipTest(key), actual.membershipTest(key));
    }
}

From source file:hivemall.sketch.bloom.BloomContainsUDFTest.java

License:Apache License

@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
    DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(30);
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        Text t = new Text(s);
        key.set(t.getBytes(), 1.0);//from   w  ww  .j av  a2s . c  om
        dbf.add(key);
    }

    return dbf;
}

From source file:hivemall.sketch.bloom.BloomFilterUtilsTest.java

License:Apache License

@Test
public void testDynamicBloomFilter() {
    DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(300000);
    final Key key = new Key();

    final Random rnd1 = new Random(43L);
    for (int i = 0; i < 1000000; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        key.set(s.getBytes(), 1.0);/*from ww  w .  j  a  va2  s  .co  m*/
        dbf.add(key);
    }

    final Random rnd2 = new Random(43L);
    for (int i = 0; i < 1000000; i++) {
        double d = rnd2.nextGaussian();
        String s = Double.toHexString(d);
        key.set(s.getBytes(), 1.0);
        Assert.assertTrue(dbf.membershipTest(key));
    }
}

From source file:hivemall.sketch.bloom.BloomFilterUtilsTest.java

License:Apache License

@Test
public void testDynamicBloomFilterSerde() throws IOException {
    final Key key = new Key();

    DynamicBloomFilter dbf1 = BloomFilterUtils.newDynamicBloomFilter(300000);
    final Random rnd1 = new Random(43L);
    for (int i = 0; i < 1000000; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        key.set(s.getBytes(), 1.0);/*ww  w . j  ava 2  s. c  o m*/
        dbf1.add(key);
    }

    DynamicBloomFilter dbf2 = BloomFilterUtils.deserialize(BloomFilterUtils.serialize(dbf1),
            new DynamicBloomFilter());
    final Random rnd2 = new Random(43L);
    for (int i = 0; i < 1000000; i++) {
        double d = rnd2.nextGaussian();
        String s = Double.toHexString(d);
        key.set(s.getBytes(), 1.0);
        Assert.assertTrue(dbf2.membershipTest(key));
    }
}

From source file:hivemall.sketch.bloom.BloomOrUDFTest.java

License:Apache License

private static void assertEquals(@Nonnull Filter expected, @Nonnull Filter actual, long seed, int size) {
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        key.set(s.getBytes(), 1.0);//from  w w w . ja  va 2s  . c o  m
        Assert.assertEquals(expected.membershipTest(key), actual.membershipTest(key));
    }
}

From source file:org.apache.mahout.utils.nlp.collocations.llr.BloomTokenFilter.java

License:Apache License

/** 
 * @param filter tokens will be checked for membership in this bloom filter
 * @param in the tokenstream to read.//from   w  w  w. j  a  v  a 2 s.  co m
 * @param keepMembers keep memoers of the bloom filter? If true works like
 *   a whitelist and members found in the list are kept and all others are
 *   dropped. If false works like a stoplist and members found in the 
 *   filter are dropped all others are kept.
 */
public BloomTokenFilter(Filter filter, boolean keepMembers, TokenStream in) {
    super(in);
    this.filter = filter;
    this.keepMembers = keepMembers;
    this.key = new Key();
    this.termAtt = addAttribute(CharTermAttribute.class);
    this.encoder = Charsets.UTF_8.newEncoder().onMalformedInput(CodingErrorAction.REPORT)
            .onUnmappableCharacter(CodingErrorAction.REPORT);
}

From source file:org.apache.mahout.utils.nlp.collocations.llr.BloomTokenFilterTest.java

License:Apache License

/** test standalone filter without tokenfilter wrapping */
@Test//from ww  w.j  a  v  a 2 s  .  co m
public void testFilter() throws IOException {
    Filter filter = getFilter(filterTokens);
    Key k = new Key();
    for (String s : filterTokens) {
        setKey(k, s);
        assertTrue("Key for string " + s + " should be filter member", filter.membershipTest(k));
    }

    for (String s : notFilterTokens) {
        setKey(k, s);
        assertFalse("Key for string " + s + " should not be filter member", filter.membershipTest(k));
    }
}

From source file:org.apache.mahout.utils.nlp.collocations.llr.BloomTokenFilterTest.java

License:Apache License

private static Filter getFilter(String[] tokens) throws IOException {
    Filter filter = new BloomFilter(100, 50, Hash.JENKINS_HASH);
    Key k = new Key();
    for (String s : tokens) {
        setKey(k, s);/*from  ww  w. j  a  va2 s  .co m*/
        filter.add(k);
    }
    return filter;
}