List of usage examples for org.apache.hadoop.util.bloom Key Key
public Key(byte[] value)
From source file:org.wso2.siddhi.extension.eventtable.rdbms.RDBMSOperator.java
License:Open Source License
@Override public boolean contains(ComplexEvent matchingEvent, Object candidateEvents) { Object[] obj;//from w w w . java2s.c o m if (expressionExecutorList != null) { obj = new Object[expressionExecutorList.size()]; int count = 0; for (ExpressionExecutor expressionExecutor : expressionExecutorList) { Object value = expressionExecutor.execute(matchingEvent); obj[count] = value; if (isBloomEnabled) { boolean mightContain = dbHandler.getBloomFilters()[attributeIndexArray[count]] .membershipTest(new Key(value.toString().getBytes())); if (!mightContain) { return false; } } count++; } } else { obj = new Object[] {}; } return dbHandler.checkExistence(obj, executionInfo); }
From source file:org.wso2.siddhi.extension.table.rdbms.RDBMSOperator.java
License:Open Source License
@Override public StreamEvent find(StateEvent matchingEvent, Object storeEvents, StreamEventCloner storeEventCloner) { Object[] obj;/*from w ww . j av a 2 s. c o m*/ if (expressionExecutorList != null) { obj = new Object[expressionExecutorList.size()]; int count = 0; for (ExpressionExecutor expressionExecutor : expressionExecutorList) { Object value = expressionExecutor.execute(matchingEvent); obj[count] = value; if (isBloomEnabled) { boolean mightContain = dbHandler.getBloomFilters()[attributeIndexArray[count]] .membershipTest(new Key(value.toString().getBytes())); if (!mightContain) { return null; } } count++; } } else { obj = new Object[] {}; } return dbHandler.selectEvent(obj, executionInfo); }
From source file:org.wso2.siddhi.extension.table.rdbms.RDBMSOperator.java
License:Open Source License
@Override public boolean contains(StateEvent matchingEvent, Object storeEvents) { Object[] obj;/* ww w. j av a 2 s. c o m*/ if (expressionExecutorList != null) { obj = new Object[expressionExecutorList.size()]; int count = 0; for (ExpressionExecutor expressionExecutor : expressionExecutorList) { Object value = expressionExecutor.execute(matchingEvent); obj[count] = value; if (isBloomEnabled) { boolean mightContain = dbHandler.getBloomFilters()[attributeIndexArray[count]] .membershipTest(new Key(value.toString().getBytes())); if (!mightContain) { return false; } } count++; } } else { obj = new Object[] {}; } return dbHandler.checkExistence(obj, executionInfo); }
From source file:uk.gov.gchq.gaffer.accumulostore.retriever.impl.AccumuloIDBetweenSetsRetrieverTest.java
License:Apache License
private void shouldDealWithFalsePositives(final boolean loadIntoMemory, final AccumuloStore store) throws StoreException, AccumuloElementConversionException { final Set<EntitySeed> seeds = new HashSet<>(); seeds.add(AccumuloTestData.SEED_A0); seeds.add(AccumuloTestData.SEED_A23); // Add a bunch of items that are not in the data to make the probability of being able to find a false // positive sensible. for (int i = 0; i < 10; i++) { seeds.add(new EntitySeed("abc" + i)); }/*from w w w . jav a2s . c om*/ // Need to make sure that the Bloom filter we create has the same size and the same number of hashes as the // one that GraphElementsWithStatisticsWithinSetRetriever creates. final int numItemsToBeAdded = loadIntoMemory ? seeds.size() : 20; if (!loadIntoMemory) { store.getProperties().setMaxEntriesForBatchScanner("20"); } // Find something that will give a false positive // Need to repeat the logic used in the getGraphElementsWithStatisticsWithinSet() method. // Calculate sensible size of filter, aiming for false positive rate of 1 in 10000, with a maximum size of // maxBloomFilterToPassToAnIterator bytes. int size = (int) (-numItemsToBeAdded * Math.log(0.0001) / (Math.pow(Math.log(2.0), 2.0))); size = Math.min(size, store.getProperties().getMaxBloomFilterToPassToAnIterator()); // Work out optimal number of hashes to use in Bloom filter based on size of set - optimal number of hashes is // (m/n)ln 2 where m is the size of the filter in bits and n is the number of items that will be added to the set. final int numHashes = Math.max(1, (int) ((size / numItemsToBeAdded) * Math.log(2))); // Create Bloom filter and add seeds to it final BloomFilter filter = new BloomFilter(size, numHashes, Hash.MURMUR_HASH); for (final EntitySeed seed : seeds) { filter.add(new Key(store.getKeyPackage().getKeyConverter().serialiseVertex(seed.getVertex()))); } // Test random items against it - should only have to shouldRetieveElementsInRangeBetweenSeeds MAX_SIZE_BLOOM_FILTER / 2 on average before find a // false positive (but impose an arbitrary limit to avoid an infinite loop if there's a problem). int count = 0; int maxNumberOfTries = 50 * store.getProperties().getMaxBloomFilterToPassToAnIterator(); while (count < maxNumberOfTries) { count++; if (filter.membershipTest(new Key(("" + count).getBytes()))) { break; } } if (count == maxNumberOfTries) { fail("Didn't find a false positive"); } // False positive is "" + count so create an edge from seeds to that final Edge edge = new Edge(TestGroups.EDGE, "A0", "" + count, true); edge.putProperty(AccumuloPropertyNames.COUNT, 1000000); Set<Element> data = new HashSet<>(); data.add(edge); final User user = new User(); addElements(data, store, user); // Now query for all edges in set - shouldn't get the false positive AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> op = new GetElementsBetweenSets<>( AccumuloTestData.SEED_A0_SET, seeds, defaultView); final Set<Element> results = returnElementsFromOperation(store, op, new User(), loadIntoMemory); // Check results are as expected assertEquals(2, results.size()); assertThat(results, IsCollectionContaining.hasItems(AccumuloTestData.EDGE_A0_A23, AccumuloTestData.A0_ENTITY)); }
From source file:uk.gov.gchq.gaffer.accumulostore.retriever.impl.AccumuloIDWithinSetRetrieverTest.java
License:Apache License
private void shouldDealWithFalsePositives(final boolean loadIntoMemory, final AccumuloStore store) throws StoreException, AccumuloElementConversionException { // Query for all edges in set {A0, A23} final Set<EntitySeed> seeds = new HashSet<>(); seeds.add(AccumuloTestData.SEED_A0); seeds.add(AccumuloTestData.SEED_A23); // Add a bunch of items that are not in the data to make the probability of being able to find a false // positive sensible. for (int i = 0; i < 10; i++) { seeds.add(new EntitySeed("abc" + i)); }/*from ww w .j a v a 2s.c o m*/ // Need to make sure that the Bloom filter we create has the same size and the same number of hashes as the // one that GraphElementsWithStatisticsWithinSetRetriever creates. final int numItemsToBeAdded = loadIntoMemory ? seeds.size() : 20; if (!loadIntoMemory) { store.getProperties().setMaxEntriesForBatchScanner("20"); } // Find something that will give a false positive // Need to repeat the logic used in the getGraphElementsWithStatisticsWithinSet() method. // Calculate sensible size of filter, aiming for false positive rate of 1 in 10000, with a maximum size of // maxBloomFilterToPassToAnIterator bytes. int size = (int) (-numItemsToBeAdded * Math.log(0.0001) / (Math.pow(Math.log(2.0), 2.0))); size = Math.min(size, store.getProperties().getMaxBloomFilterToPassToAnIterator()); // Work out optimal number of hashes to use in Bloom filter based on size of set - optimal number of hashes is // (m/n)ln 2 where m is the size of the filter in bits and n is the number of items that will be added to the set. final int numHashes = Math.max(1, (int) ((size / numItemsToBeAdded) * Math.log(2))); // Create Bloom filter and add seeds to it final BloomFilter filter = new BloomFilter(size, numHashes, Hash.MURMUR_HASH); for (final EntitySeed seed : seeds) { filter.add(new Key(store.getKeyPackage().getKeyConverter().serialiseVertex(seed.getVertex()))); } // Test random items against it - should only have to shouldRetieveElementsInRangeBetweenSeeds MAX_SIZE_BLOOM_FILTER / 2 on average before find a // false positive (but impose an arbitrary limit to avoid an infinite loop if there's a problem). int count = 0; int maxNumberOfTries = 50 * store.getProperties().getMaxBloomFilterToPassToAnIterator(); while (count < maxNumberOfTries) { count++; if (filter.membershipTest(new Key(("" + count).getBytes()))) { break; } } if (count == maxNumberOfTries) { fail("Didn't find a false positive"); } // False positive is "" + count so create an edge from seeds to that final GetElements<EntitySeed, ?> op = new GetElements<>(defaultView, seeds); // Now query for all edges in set - shouldn't get the false positive final Set<Element> results = returnElementsFromOperation(store, op, new User(), loadIntoMemory); // Check results are as expected assertThat(results, IsCollectionContaining.hasItems(AccumuloTestData.EDGE_A0_A23, AccumuloTestData.A0_ENTITY, AccumuloTestData.A23_ENTITY)); }