List of usage examples for org.apache.hadoop.util.bloom Key Key
public Key(byte[] value)
From source file:boa.aggregators.DistinctAggregator.java
License:Apache License
/** {@inheritDoc} */ @Override//from w w w.ja va 2 s . com public void aggregate(final String data, final String metadata) throws IOException, InterruptedException { // instantiate a bloom filter input key initialized by the data Key key = new Key(data.getBytes()); // if the key is already in the filter, forget it if (this.filter.membershipTest(key)) return; // add the key to the bloom filter this.filter.add(key); // and collect it this.collect(data); }
From source file:boa.aggregators.UniqueAggregator.java
License:Apache License
/** {@inheritDoc} */ @Override//w w w. ja va2 s .c om public void aggregate(final String data, final String metadata) throws IOException, InterruptedException { // instantiate a bloom filter input key initialized by the data final Key key = new Key(data.getBytes()); // if the key is already in the filter, forget about it if (this.filter.membershipTest(key)) return; // add the key to the bloom filter this.filter.add(key); if (this.isCombining()) this.collect(data); else this.total++; }
From source file:brickhouse.udf.bloom.BloomContainsUDF.java
License:Apache License
public Boolean evaluate(String key, String bloomFilter) throws HiveException { Filter bloom = BloomFactory.GetBloomFilter(bloomFilter); if (bloom != null) { return bloom.membershipTest(new Key(key.getBytes())); } else {/*from w w w . j a v a 2 s. com*/ throw new HiveException("Unable to find bloom " + bloomFilter); } }
From source file:com.cloudera.util.bloom.BloomSet.java
License:Apache License
/** * Adds an int to the bloom filter./* w w w . j a v a2s. c o m*/ */ public void addInt(int i) { ByteBuffer buf = ByteBuffer.allocate(4); buf.putInt(i); Key k = new Key(buf.array()); bloom.add(k); }
From source file:com.eincs.athens.db.data.BlockKey.java
License:Apache License
public Key getBloomFilterKey() { byte[] useridBytes = userId; ;//from w w w .j av a 2 s .c om byte[] addressBytes = address; if (addressBytes == null) { addressBytes = new byte[0]; } if (useridBytes == null) { useridBytes = new byte[0]; } byte[] value = Bytes.concat(addressBytes, useridBytes, method.getBytes(), path.getBytes()); Key key = new Key(value); return key; }
From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.mapper.BloomFilterMapper.java
License:Apache License
protected void map(Text key, Text value, Mapper<Text, Text, Text, BloomFilter>.Context context) throws IOException, InterruptedException { String[] valueSplit = value.toString().split(JPAHadoopConstants.DEFAULT_DELIM_REG_EXP); _bFilter.add(new Key(valueSplit[0].getBytes())); }
From source file:com.uber.hoodie.common.BloomFilter.java
License:Apache License
public void add(String key) { if (key == null) { throw new NullPointerException("Key cannot by null"); }/* ww w .j a v a 2 s. c o m*/ filter.add(new Key(key.getBytes(StandardCharsets.UTF_8))); }
From source file:com.uber.hoodie.common.BloomFilter.java
License:Apache License
public boolean mightContain(String key) { if (key == null) { throw new NullPointerException("Key cannot by null"); }// w w w .j ava 2 s . c o m return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8))); }
From source file:edu.isi.karma.rdf.bloom.TestJSONRDFGeneratorWithBloomFilters.java
License:Apache License
/** * Test method for// w w w . ja v a 2 s . c om * {@link edu.isi.karma.rdf.JSONRDFGenerator#generateRDF(java.lang.String, java.lang.String, boolean, java.io.PrintWriter)} * . */ @Test public void testGenerateRDF1() { try { KR2RMLBloomFilterManager peopleBloomFilterManager = getBloomFilterManagerForSource("people.json", InputType.JSON, "people-model"); KR2RMLBloomFilterManager scheduleBloomFilterManager = getBloomFilterManagerForSource("schedule.csv", InputType.CSV, "schedule-model"); KR2RMLBloomFilter peoplePersonWithTwitterIdBF = peopleBloomFilterManager.getBloomFilter( "http://isi.edu/integration/karma/dev#PredicateObjectMap_5fcf2d39-f62b-4cdd-863e-bde21493e1bd"); Key k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes()); assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Kozareva>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes()); assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes()); assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Wu>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); assertEquals(3, peoplePersonWithTwitterIdBF.estimateNumberOfHashedValues()); KR2RMLBloomFilter schedulePersonBF = scheduleBloomFilterManager.getBloomFilter( "http://isi.edu/integration/karma/dev#TriplesMap_413a6176-d893-45aa-b1c2-6661b5c491ab"); k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); assertEquals(5, schedulePersonBF.estimateNumberOfHashedValues()); KR2RMLBloomFilter intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH); intersectionBF.or(peoplePersonWithTwitterIdBF); intersectionBF.and(schedulePersonBF); assertEquals(3, intersectionBF.estimateNumberOfHashedValues()); KR2RMLBloomFilter hasInstructorBF = scheduleBloomFilterManager.getBloomFilter( "http://isi.edu/integration/karma/dev#RefObjectMap_bb82f923-2953-4bd4-bc7b-d1196e05dbf6"); k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes()); assertTrue(hasInstructorBF.membershipTest(k)); intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH); intersectionBF.or(hasInstructorBF); intersectionBF.and(peoplePersonWithTwitterIdBF); assertEquals(3, intersectionBF.estimateNumberOfHashedValues()); } catch (Exception e) { logger.error("testGenerateRDF1 failed:", e); fail("Exception: " + e.getMessage()); } }
From source file:edu.isi.karma.rdf.TestJSONRDFGeneratorWithBloomFilters.java
License:Apache License
/** * Test method for//w ww .j a va 2 s .c o m * {@link edu.isi.karma.rdf.JSONRDFGenerator#generateRDF(java.lang.String, java.lang.String, boolean, java.io.PrintWriter)} * . */ @Test public void testGenerateRDF1() { try { KR2RMLBloomFilterManager peopleBloomFilterManager = getBloomFilterManagerForSource("people.json", InputType.JSON, "people-model"); KR2RMLBloomFilterManager scheduleBloomFilterManager = getBloomFilterManagerForSource("schedule.csv", InputType.CSV, "schedule-model"); KR2RMLBloomFilter peoplePersonWithTwitterIdBF = peopleBloomFilterManager.getBloomFilter( "http://isi.edu/integration/karma/dev#PredicateObjectMap_1941470a-1dfb-4716-803b-5f07a4af90fd"); Key k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes()); assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Kozareva>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes()); assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes()); assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Wu>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); assertEquals(3, peoplePersonWithTwitterIdBF.estimateNumberOfHashedValues()); KR2RMLBloomFilter schedulePersonBF = scheduleBloomFilterManager.getBloomFilter( "http://isi.edu/integration/karma/dev#TriplesMap_413a6176-d893-45aa-b1c2-6661b5c491ab"); k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); assertEquals(5, schedulePersonBF.estimateNumberOfHashedValues()); KR2RMLBloomFilter intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH); intersectionBF.or(peoplePersonWithTwitterIdBF); intersectionBF.and(schedulePersonBF); assertEquals(3, intersectionBF.estimateNumberOfHashedValues()); KR2RMLBloomFilter hasInstructorBF = scheduleBloomFilterManager.getBloomFilter( "http://isi.edu/integration/karma/dev#RefObjectMap_bb82f923-2953-4bd4-bc7b-d1196e05dbf6"); k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes()); assertTrue(hasInstructorBF.membershipTest(k)); intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH); intersectionBF.or(hasInstructorBF); intersectionBF.and(peoplePersonWithTwitterIdBF); assertEquals(3, intersectionBF.estimateNumberOfHashedValues()); } catch (Exception e) { logger.error("testGenerateRDF1 failed:", e); fail("Exception: " + e.getMessage()); } }