Example usage for org.apache.hadoop.util.bloom Key Key

List of usage examples for org.apache.hadoop.util.bloom Key Key

Introduction

In this page you can find the example usage for org.apache.hadoop.util.bloom Key Key.

Prototype

public Key(byte[] value) 

Source Link

Document

Constructor.

Usage

From source file:boa.aggregators.DistinctAggregator.java

License:Apache License

/** {@inheritDoc} */
@Override//from w w  w.ja  va 2 s  .  com
public void aggregate(final String data, final String metadata) throws IOException, InterruptedException {
    // instantiate a bloom filter input key initialized by the data
    Key key = new Key(data.getBytes());

    // if the key is already in the filter, forget it
    if (this.filter.membershipTest(key))
        return;

    // add the key to the bloom filter
    this.filter.add(key);

    // and collect it
    this.collect(data);
}

From source file:boa.aggregators.UniqueAggregator.java

License:Apache License

/** {@inheritDoc} */
@Override//w  w  w. ja  va2 s  .c  om
public void aggregate(final String data, final String metadata) throws IOException, InterruptedException {
    // instantiate a bloom filter input key initialized by the data
    final Key key = new Key(data.getBytes());

    // if the key is already in the filter, forget about it
    if (this.filter.membershipTest(key))
        return;

    // add the key to the bloom filter
    this.filter.add(key);

    if (this.isCombining())
        this.collect(data);
    else
        this.total++;
}

From source file:brickhouse.udf.bloom.BloomContainsUDF.java

License:Apache License

public Boolean evaluate(String key, String bloomFilter) throws HiveException {
    Filter bloom = BloomFactory.GetBloomFilter(bloomFilter);
    if (bloom != null) {
        return bloom.membershipTest(new Key(key.getBytes()));
    } else {/*from  w  w  w  .  j a v  a  2  s. com*/
        throw new HiveException("Unable to find bloom " + bloomFilter);
    }
}

From source file:com.cloudera.util.bloom.BloomSet.java

License:Apache License

/**
 * Adds an int to the bloom filter./* w w  w  .  j a v a2s. c  o  m*/
 */
public void addInt(int i) {
    ByteBuffer buf = ByteBuffer.allocate(4);
    buf.putInt(i);
    Key k = new Key(buf.array());
    bloom.add(k);
}

From source file:com.eincs.athens.db.data.BlockKey.java

License:Apache License

public Key getBloomFilterKey() {
    byte[] useridBytes = userId;
    ;//from w  w w  .j av a  2 s  .c  om
    byte[] addressBytes = address;

    if (addressBytes == null) {
        addressBytes = new byte[0];
    }

    if (useridBytes == null) {
        useridBytes = new byte[0];
    }

    byte[] value = Bytes.concat(addressBytes, useridBytes, method.getBytes(), path.getBytes());
    Key key = new Key(value);
    return key;
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.mapper.BloomFilterMapper.java

License:Apache License

protected void map(Text key, Text value, Mapper<Text, Text, Text, BloomFilter>.Context context)
        throws IOException, InterruptedException {
    String[] valueSplit = value.toString().split(JPAHadoopConstants.DEFAULT_DELIM_REG_EXP);
    _bFilter.add(new Key(valueSplit[0].getBytes()));
}

From source file:com.uber.hoodie.common.BloomFilter.java

License:Apache License

public void add(String key) {
    if (key == null) {
        throw new NullPointerException("Key cannot by null");
    }/*  ww w .j  a v  a 2 s. c  o m*/
    filter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
}

From source file:com.uber.hoodie.common.BloomFilter.java

License:Apache License

public boolean mightContain(String key) {
    if (key == null) {
        throw new NullPointerException("Key cannot by null");
    }//  w  w  w  .j ava  2  s  .  c o  m
    return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
}

From source file:edu.isi.karma.rdf.bloom.TestJSONRDFGeneratorWithBloomFilters.java

License:Apache License

/**
 * Test method for// w w w . ja  v  a 2  s .  c  om
 * {@link edu.isi.karma.rdf.JSONRDFGenerator#generateRDF(java.lang.String, java.lang.String, boolean, java.io.PrintWriter)}
 * .
 */
@Test
public void testGenerateRDF1() {
    try {

        KR2RMLBloomFilterManager peopleBloomFilterManager = getBloomFilterManagerForSource("people.json",
                InputType.JSON, "people-model");
        KR2RMLBloomFilterManager scheduleBloomFilterManager = getBloomFilterManagerForSource("schedule.csv",
                InputType.CSV, "schedule-model");
        KR2RMLBloomFilter peoplePersonWithTwitterIdBF = peopleBloomFilterManager.getBloomFilter(
                "http://isi.edu/integration/karma/dev#PredicateObjectMap_5fcf2d39-f62b-4cdd-863e-bde21493e1bd");
        Key k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes());
        assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Kozareva>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes());
        assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes());
        assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Wu>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        assertEquals(3, peoplePersonWithTwitterIdBF.estimateNumberOfHashedValues());
        KR2RMLBloomFilter schedulePersonBF = scheduleBloomFilterManager.getBloomFilter(
                "http://isi.edu/integration/karma/dev#TriplesMap_413a6176-d893-45aa-b1c2-6661b5c491ab");
        k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        assertEquals(5, schedulePersonBF.estimateNumberOfHashedValues());

        KR2RMLBloomFilter intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize,
                KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH);
        intersectionBF.or(peoplePersonWithTwitterIdBF);
        intersectionBF.and(schedulePersonBF);
        assertEquals(3, intersectionBF.estimateNumberOfHashedValues());

        KR2RMLBloomFilter hasInstructorBF = scheduleBloomFilterManager.getBloomFilter(
                "http://isi.edu/integration/karma/dev#RefObjectMap_bb82f923-2953-4bd4-bc7b-d1196e05dbf6");

        k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes());
        assertTrue(hasInstructorBF.membershipTest(k));
        intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize,
                KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH);
        intersectionBF.or(hasInstructorBF);
        intersectionBF.and(peoplePersonWithTwitterIdBF);
        assertEquals(3, intersectionBF.estimateNumberOfHashedValues());

    } catch (Exception e) {
        logger.error("testGenerateRDF1 failed:", e);
        fail("Exception: " + e.getMessage());
    }
}

From source file:edu.isi.karma.rdf.TestJSONRDFGeneratorWithBloomFilters.java

License:Apache License

/**
 * Test method for//w ww  .j  a  va 2  s .c  o m
 * {@link edu.isi.karma.rdf.JSONRDFGenerator#generateRDF(java.lang.String, java.lang.String, boolean, java.io.PrintWriter)}
 * .
 */
@Test
public void testGenerateRDF1() {
    try {

        KR2RMLBloomFilterManager peopleBloomFilterManager = getBloomFilterManagerForSource("people.json",
                InputType.JSON, "people-model");
        KR2RMLBloomFilterManager scheduleBloomFilterManager = getBloomFilterManagerForSource("schedule.csv",
                InputType.CSV, "schedule-model");
        KR2RMLBloomFilter peoplePersonWithTwitterIdBF = peopleBloomFilterManager.getBloomFilter(
                "http://isi.edu/integration/karma/dev#PredicateObjectMap_1941470a-1dfb-4716-803b-5f07a4af90fd");
        Key k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes());
        assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Kozareva>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes());
        assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes());
        assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Wu>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        assertEquals(3, peoplePersonWithTwitterIdBF.estimateNumberOfHashedValues());
        KR2RMLBloomFilter schedulePersonBF = scheduleBloomFilterManager.getBloomFilter(
                "http://isi.edu/integration/karma/dev#TriplesMap_413a6176-d893-45aa-b1c2-6661b5c491ab");
        k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        assertEquals(5, schedulePersonBF.estimateNumberOfHashedValues());

        KR2RMLBloomFilter intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize,
                KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH);
        intersectionBF.or(peoplePersonWithTwitterIdBF);
        intersectionBF.and(schedulePersonBF);
        assertEquals(3, intersectionBF.estimateNumberOfHashedValues());

        KR2RMLBloomFilter hasInstructorBF = scheduleBloomFilterManager.getBloomFilter(
                "http://isi.edu/integration/karma/dev#RefObjectMap_bb82f923-2953-4bd4-bc7b-d1196e05dbf6");

        k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes());
        assertTrue(hasInstructorBF.membershipTest(k));
        intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize,
                KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH);
        intersectionBF.or(hasInstructorBF);
        intersectionBF.and(peoplePersonWithTwitterIdBF);
        assertEquals(3, intersectionBF.estimateNumberOfHashedValues());

    } catch (Exception e) {
        logger.error("testGenerateRDF1 failed:", e);
        fail("Exception: " + e.getMessage());
    }
}