Example usage for org.apache.hadoop.util.bloom Key Key

List of usage examples for org.apache.hadoop.util.bloom Key Key

Introduction

In this page you can find the example usage for org.apache.hadoop.util.bloom Key Key.

Prototype

public Key(byte[] value) 

Source Link

Document

Constructor.

Usage

From source file:org.wso2.siddhi.extension.eventtable.rdbms.RDBMSOperator.java

License:Open Source License

@Override
public boolean contains(ComplexEvent matchingEvent, Object candidateEvents) {

    Object[] obj;//from   w  w w .  java2s.c o  m
    if (expressionExecutorList != null) {
        obj = new Object[expressionExecutorList.size()];
        int count = 0;
        for (ExpressionExecutor expressionExecutor : expressionExecutorList) {
            Object value = expressionExecutor.execute(matchingEvent);
            obj[count] = value;
            if (isBloomEnabled) {
                boolean mightContain = dbHandler.getBloomFilters()[attributeIndexArray[count]]
                        .membershipTest(new Key(value.toString().getBytes()));
                if (!mightContain) {
                    return false;
                }
            }
            count++;
        }
    } else {
        obj = new Object[] {};
    }
    return dbHandler.checkExistence(obj, executionInfo);
}

From source file:org.wso2.siddhi.extension.table.rdbms.RDBMSOperator.java

License:Open Source License

@Override
public StreamEvent find(StateEvent matchingEvent, Object storeEvents, StreamEventCloner storeEventCloner) {

    Object[] obj;/*from w ww  . j av  a 2 s.  c  o m*/
    if (expressionExecutorList != null) {
        obj = new Object[expressionExecutorList.size()];
        int count = 0;
        for (ExpressionExecutor expressionExecutor : expressionExecutorList) {
            Object value = expressionExecutor.execute(matchingEvent);
            obj[count] = value;
            if (isBloomEnabled) {
                boolean mightContain = dbHandler.getBloomFilters()[attributeIndexArray[count]]
                        .membershipTest(new Key(value.toString().getBytes()));
                if (!mightContain) {
                    return null;
                }
            }
            count++;
        }
    } else {
        obj = new Object[] {};
    }
    return dbHandler.selectEvent(obj, executionInfo);
}

From source file:org.wso2.siddhi.extension.table.rdbms.RDBMSOperator.java

License:Open Source License

@Override
public boolean contains(StateEvent matchingEvent, Object storeEvents) {

    Object[] obj;/*  ww  w. j  av  a  2 s. c o m*/
    if (expressionExecutorList != null) {
        obj = new Object[expressionExecutorList.size()];
        int count = 0;
        for (ExpressionExecutor expressionExecutor : expressionExecutorList) {
            Object value = expressionExecutor.execute(matchingEvent);
            obj[count] = value;
            if (isBloomEnabled) {
                boolean mightContain = dbHandler.getBloomFilters()[attributeIndexArray[count]]
                        .membershipTest(new Key(value.toString().getBytes()));
                if (!mightContain) {
                    return false;
                }
            }
            count++;
        }
    } else {
        obj = new Object[] {};
    }
    return dbHandler.checkExistence(obj, executionInfo);
}

From source file:uk.gov.gchq.gaffer.accumulostore.retriever.impl.AccumuloIDBetweenSetsRetrieverTest.java

License:Apache License

private void shouldDealWithFalsePositives(final boolean loadIntoMemory, final AccumuloStore store)
        throws StoreException, AccumuloElementConversionException {
    final Set<EntitySeed> seeds = new HashSet<>();
    seeds.add(AccumuloTestData.SEED_A0);
    seeds.add(AccumuloTestData.SEED_A23);
    // Add a bunch of items that are not in the data to make the probability of being able to find a false
    // positive sensible.
    for (int i = 0; i < 10; i++) {
        seeds.add(new EntitySeed("abc" + i));
    }/*from w w w . jav a2s . c om*/

    // Need to make sure that the Bloom filter we create has the same size and the same number of hashes as the
    // one that GraphElementsWithStatisticsWithinSetRetriever creates.
    final int numItemsToBeAdded = loadIntoMemory ? seeds.size() : 20;
    if (!loadIntoMemory) {
        store.getProperties().setMaxEntriesForBatchScanner("20");
    }

    // Find something that will give a false positive
    // Need to repeat the logic used in the getGraphElementsWithStatisticsWithinSet() method.
    // Calculate sensible size of filter, aiming for false positive rate of 1 in 10000, with a maximum size of
    // maxBloomFilterToPassToAnIterator bytes.
    int size = (int) (-numItemsToBeAdded * Math.log(0.0001) / (Math.pow(Math.log(2.0), 2.0)));
    size = Math.min(size, store.getProperties().getMaxBloomFilterToPassToAnIterator());

    // Work out optimal number of hashes to use in Bloom filter based on size of set - optimal number of hashes is
    // (m/n)ln 2 where m is the size of the filter in bits and n is the number of items that will be added to the set.
    final int numHashes = Math.max(1, (int) ((size / numItemsToBeAdded) * Math.log(2)));
    // Create Bloom filter and add seeds to it
    final BloomFilter filter = new BloomFilter(size, numHashes, Hash.MURMUR_HASH);
    for (final EntitySeed seed : seeds) {
        filter.add(new Key(store.getKeyPackage().getKeyConverter().serialiseVertex(seed.getVertex())));
    }

    // Test random items against it - should only have to shouldRetieveElementsInRangeBetweenSeeds MAX_SIZE_BLOOM_FILTER / 2 on average before find a
    // false positive (but impose an arbitrary limit to avoid an infinite loop if there's a problem).
    int count = 0;
    int maxNumberOfTries = 50 * store.getProperties().getMaxBloomFilterToPassToAnIterator();
    while (count < maxNumberOfTries) {
        count++;
        if (filter.membershipTest(new Key(("" + count).getBytes()))) {
            break;
        }
    }
    if (count == maxNumberOfTries) {
        fail("Didn't find a false positive");
    }

    // False positive is "" + count so create an edge from seeds to that
    final Edge edge = new Edge(TestGroups.EDGE, "A0", "" + count, true);
    edge.putProperty(AccumuloPropertyNames.COUNT, 1000000);
    Set<Element> data = new HashSet<>();
    data.add(edge);
    final User user = new User();
    addElements(data, store, user);
    // Now query for all edges in set - shouldn't get the false positive
    AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> op = new GetElementsBetweenSets<>(
            AccumuloTestData.SEED_A0_SET, seeds, defaultView);
    final Set<Element> results = returnElementsFromOperation(store, op, new User(), loadIntoMemory);
    // Check results are as expected

    assertEquals(2, results.size());
    assertThat(results,
            IsCollectionContaining.hasItems(AccumuloTestData.EDGE_A0_A23, AccumuloTestData.A0_ENTITY));
}

From source file:uk.gov.gchq.gaffer.accumulostore.retriever.impl.AccumuloIDWithinSetRetrieverTest.java

License:Apache License

private void shouldDealWithFalsePositives(final boolean loadIntoMemory, final AccumuloStore store)
        throws StoreException, AccumuloElementConversionException {
    // Query for all edges in set {A0, A23}
    final Set<EntitySeed> seeds = new HashSet<>();
    seeds.add(AccumuloTestData.SEED_A0);
    seeds.add(AccumuloTestData.SEED_A23);
    // Add a bunch of items that are not in the data to make the probability of being able to find a false
    // positive sensible.
    for (int i = 0; i < 10; i++) {
        seeds.add(new EntitySeed("abc" + i));
    }/*from   ww  w .j a v  a  2s.c  o  m*/

    // Need to make sure that the Bloom filter we create has the same size and the same number of hashes as the
    // one that GraphElementsWithStatisticsWithinSetRetriever creates.
    final int numItemsToBeAdded = loadIntoMemory ? seeds.size() : 20;
    if (!loadIntoMemory) {
        store.getProperties().setMaxEntriesForBatchScanner("20");
    }

    // Find something that will give a false positive
    // Need to repeat the logic used in the getGraphElementsWithStatisticsWithinSet() method.
    // Calculate sensible size of filter, aiming for false positive rate of 1 in 10000, with a maximum size of
    // maxBloomFilterToPassToAnIterator bytes.
    int size = (int) (-numItemsToBeAdded * Math.log(0.0001) / (Math.pow(Math.log(2.0), 2.0)));
    size = Math.min(size, store.getProperties().getMaxBloomFilterToPassToAnIterator());

    // Work out optimal number of hashes to use in Bloom filter based on size of set - optimal number of hashes is
    // (m/n)ln 2 where m is the size of the filter in bits and n is the number of items that will be added to the set.
    final int numHashes = Math.max(1, (int) ((size / numItemsToBeAdded) * Math.log(2)));
    // Create Bloom filter and add seeds to it
    final BloomFilter filter = new BloomFilter(size, numHashes, Hash.MURMUR_HASH);
    for (final EntitySeed seed : seeds) {
        filter.add(new Key(store.getKeyPackage().getKeyConverter().serialiseVertex(seed.getVertex())));
    }

    // Test random items against it - should only have to shouldRetieveElementsInRangeBetweenSeeds MAX_SIZE_BLOOM_FILTER / 2 on average before find a
    // false positive (but impose an arbitrary limit to avoid an infinite loop if there's a problem).
    int count = 0;
    int maxNumberOfTries = 50 * store.getProperties().getMaxBloomFilterToPassToAnIterator();
    while (count < maxNumberOfTries) {
        count++;
        if (filter.membershipTest(new Key(("" + count).getBytes()))) {
            break;
        }
    }
    if (count == maxNumberOfTries) {
        fail("Didn't find a false positive");
    }

    // False positive is "" + count so create an edge from seeds to that
    final GetElements<EntitySeed, ?> op = new GetElements<>(defaultView, seeds);
    // Now query for all edges in set - shouldn't get the false positive
    final Set<Element> results = returnElementsFromOperation(store, op, new User(), loadIntoMemory);

    // Check results are as expected
    assertThat(results, IsCollectionContaining.hasItems(AccumuloTestData.EDGE_A0_A23,
            AccumuloTestData.A0_ENTITY, AccumuloTestData.A23_ENTITY));
}