Example usage for org.apache.commons.math3.random RandomGenerator nextLong

List of usage examples for org.apache.commons.math3.random RandomGenerator nextLong

Introduction

In this page you can find the example usage for org.apache.commons.math3.random RandomGenerator nextLong.

Prototype

long nextLong();

Source Link

Document

Returns the next pseudorandom, uniformly distributed long value from this random number generator's sequence.

Usage

From source file:it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction.java

/**
 * Creates a new minimal perfect hash function for the given keys.
 * //from   ww  w .j  a v a2 s  .c o m
 * @param keys the keys to hash, or {@code null}.
 * @param transform a transformation strategy for the keys.
 * @param signatureWidth a signature width, or 0 for no signature.
 * @param tempDir a temporary directory for the store files, or {@code null} for the standard temporary directory.
 * @param chunkedHashStore a chunked hash store containing the keys, or {@code null}; the store
 * can be unchecked, but in this case <code>keys</code> and <code>transform</code> must be non-{@code null}. 
 */
protected GOVMinimalPerfectHashFunction(final Iterable<? extends T> keys,
        final TransformationStrategy<? super T> transform, final int signatureWidth, final File tempDir,
        ChunkedHashStore<T> chunkedHashStore) throws IOException {
    this.transform = transform;

    final ProgressLogger pl = new ProgressLogger(LOGGER);
    pl.displayLocalSpeed = true;
    pl.displayFreeMemory = true;
    final RandomGenerator r = new XorShift1024StarRandomGenerator();
    pl.itemsName = "keys";

    final boolean givenChunkedHashStore = chunkedHashStore != null;
    if (!givenChunkedHashStore) {
        chunkedHashStore = new ChunkedHashStore<T>(transform, tempDir, pl);
        chunkedHashStore.reset(r.nextLong());
        chunkedHashStore.addAll(keys.iterator());
    }
    n = chunkedHashStore.size();

    defRetValue = -1; // For the very few cases in which we can decide

    int log2NumChunks = Math.max(0, Fast.mostSignificantBit(n >> LOG2_CHUNK_SIZE));
    chunkShift = chunkedHashStore.log2Chunks(log2NumChunks);
    final int numChunks = 1 << log2NumChunks;

    LOGGER.debug("Number of chunks: " + numChunks);

    edgeOffsetAndSeed = new long[numChunks + 1];

    bitVector = LongArrayBitVector.getInstance();
    (values = bitVector.asLongBigList(2)).size(n * C_TIMES_256 >> 8);
    array = bitVector.bits();

    int duplicates = 0;

    for (;;) {
        LOGGER.debug("Generating minimal perfect hash function...");

        pl.expectedUpdates = numChunks;
        pl.itemsName = "chunks";
        pl.start("Analysing chunks... ");

        try {
            int q = 0;
            long unorientable = 0, unsolvable = 0;
            for (ChunkedHashStore.Chunk chunk : chunkedHashStore) {

                edgeOffsetAndSeed[q + 1] = edgeOffsetAndSeed[q] + chunk.size();

                long seed = 0;
                final long off = vertexOffset(edgeOffsetAndSeed[q]);
                final Linear3SystemSolver<BitVector> solver = new Linear3SystemSolver<BitVector>(
                        (int) (vertexOffset(edgeOffsetAndSeed[q + 1]) - off), chunk.size());

                for (;;) {
                    final boolean solved = solver.generateAndSolve(chunk, seed, null);
                    unorientable += solver.unorientable;
                    unsolvable += solver.unsolvable;
                    if (solved)
                        break;
                    seed += SEED_STEP;
                    if (seed == 0)
                        throw new AssertionError("Exhausted local seeds");
                }

                this.edgeOffsetAndSeed[q] |= seed;
                final long[] solution = solver.solution;
                for (int i = 0; i < solution.length; i++)
                    values.set(i + off, solution[i]);
                q++;

                pl.update();

                if (ASSERTS) {
                    final IntOpenHashSet pos = new IntOpenHashSet();
                    final int[] e = new int[3];
                    for (long[] triple : chunk) {
                        Linear3SystemSolver.tripleToEquation(triple, seed,
                                (int) (vertexOffset(edgeOffsetAndSeed[q]) - off), e);

                        assert pos
                                .add(e[(int) (values.getLong(off + e[0]) + values.getLong(off + e[1])
                                        + values.getLong(off + e[2])) % 3]) : "<" + e[0] + "," + e[1] + ","
                                                + e[2] + ">: "
                                                + e[(int) (values.getLong(off + e[0])
                                                        + values.getLong(off + e[1])
                                                        + values.getLong(off + e[2])) % 3];
                    }
                }
            }

            LOGGER.info("Unorientable graphs: " + unorientable + "/" + numChunks + " ("
                    + Util.format(100.0 * unorientable / numChunks) + "%)");
            LOGGER.info("Unsolvable systems: " + unsolvable + "/" + numChunks + " ("
                    + Util.format(100.0 * unsolvable / numChunks) + "%)");

            pl.done();
            break;
        } catch (ChunkedHashStore.DuplicateException e) {
            if (keys == null)
                throw new IllegalStateException(
                        "You provided no keys, but the chunked hash store was not checked");
            if (duplicates++ > 3)
                throw new IllegalArgumentException("The input list contains duplicates");
            LOGGER.warn("Found duplicate. Recomputing triples...");
            chunkedHashStore.reset(r.nextLong());
            chunkedHashStore.addAll(keys.iterator());
        }
    }

    globalSeed = chunkedHashStore.seed();

    LOGGER.info("Completed.");
    LOGGER.debug("Forecast bit cost per key: " + 2 * C + 64. / (1 << LOG2_CHUNK_SIZE));
    LOGGER.info("Actual bit cost per key: " + (double) numBits() / n);

    if (signatureWidth != 0) {
        signatureMask = -1L >>> Long.SIZE - signatureWidth;
        (signatures = LongArrayBitVector.getInstance().asLongBigList(signatureWidth)).size(n);
        pl.expectedUpdates = n;
        pl.itemsName = "signatures";
        pl.start("Signing...");
        for (ChunkedHashStore.Chunk chunk : chunkedHashStore) {
            Iterator<long[]> iterator = chunk.iterator();
            for (int i = chunk.size(); i-- != 0;) {
                final long[] triple = iterator.next();
                final int[] e = new int[3];
                signatures.set(getLongByTripleNoCheck(triple, e), signatureMask & triple[0]);
                pl.lightUpdate();
            }
        }
        pl.done();
    } else {
        signatureMask = 0;
        signatures = null;
    }

    if (!givenChunkedHashStore)
        chunkedHashStore.close();
}

From source file:it.unimi.dsi.sux4j.mph.TwoStepsGOV3Function.java

/** Creates a new two-step function for the given keys and values.
 * /*from   www . ja  v a  2  s.c o m*/
 * @param keys the keys in the domain of the function.
 * @param transform a transformation strategy for the keys.
 * @param values values to be assigned to each key, in the same order of the iterator returned by <code>keys</code>; if {@code null}, the
 * assigned value will the the ordinal number of each key.
 * @param tempDir a temporary directory for the store files, or {@code null} for the standard temporary directory.
 * @param chunkedHashStore a chunked hash store containing the keys associated with their rank, or {@code null}; the store
 * can be unchecked, but in this case <code>keys</code> and <code>transform</code> must be non-{@code null}. 
 */
protected TwoStepsGOV3Function(final Iterable<? extends T> keys,
        final TransformationStrategy<? super T> transform, final LongBigList values, final File tempDir,
        ChunkedHashStore<T> chunkedHashStore) throws IOException {
    this.transform = transform;
    final ProgressLogger pl = new ProgressLogger(LOGGER);
    pl.displayLocalSpeed = true;
    pl.displayFreeMemory = true;
    final RandomGenerator random = new XorShift1024StarRandomGenerator();
    pl.itemsName = "keys";

    final boolean givenChunkedHashStore = chunkedHashStore != null;
    if (!givenChunkedHashStore) {
        if (keys == null)
            throw new IllegalArgumentException(
                    "If you do not provide a chunked hash store, you must provide the keys");
        chunkedHashStore = new ChunkedHashStore<T>(transform, pl);
        chunkedHashStore.reset(random.nextLong());
        chunkedHashStore.addAll(keys.iterator());
    }
    n = chunkedHashStore.size();
    defRetValue = -1; // For the very few cases in which we can decide

    if (n == 0) {
        rankMean = escape = width = 0;
        firstFunction = secondFunction = null;
        remap = null;
        if (!givenChunkedHashStore)
            chunkedHashStore.close();
        return;
    }

    // Compute distribution of values and maximum number of bits.
    int w = 0, size;
    long v;
    final Long2LongOpenHashMap counts = new Long2LongOpenHashMap();
    counts.defaultReturnValue(-1);
    for (LongIterator i = values.iterator(); i.hasNext();) {
        v = i.nextLong();
        counts.put(v, counts.get(v) + 1);
        size = Fast.length(v);
        if (size > w)
            w = size;
    }

    this.width = w;
    final int m = counts.size();

    LOGGER.debug("Generating two-steps GOV3 function with " + w + " output bits...");

    // Sort keys by reverse frequency
    final long[] keysArray = counts.keySet().toLongArray(new long[m]);
    LongArrays.quickSort(keysArray, 0, keysArray.length, new AbstractLongComparator() {
        private static final long serialVersionUID = 1L;

        public int compare(final long a, final long b) {
            return Long.signum(counts.get(b) - counts.get(a));
        }
    });

    long mean = 0;
    for (int i = 0; i < keysArray.length; i++)
        mean += i * counts.get(keysArray[i]);
    rankMean = (double) mean / n;

    // Analyze data and choose a threshold
    long post = n, bestCost = Long.MAX_VALUE;
    int pos = 0, best = -1;

    // Examine every possible choice for r. Note that r = 0 implies one function, so we do not need to test the case r == w.
    for (int r = 0; r < w && pos < m; r++) {

        /* This cost function is dependent on the implementation of GOV3Function. 
         * Note that for r = 0 we are actually computing the cost of a single function (the first one). */
        final long cost = (long) Math.min(GOV3Function.C * n * 1.126 + n * r, GOV3Function.C * n * r)
                + (long) Math.min(GOV3Function.C * post * 1.126 + post * w, GOV3Function.C * post * w)
                + pos * Long.SIZE;

        if (cost < bestCost) {
            best = r;
            bestCost = cost;
        }

        /* We add to pre and subtract from post the counts of keys from position (1<<r)-1 to position (1<<r+1)-1. */
        for (int j = 0; j < (1 << r) && pos < m; j++) {
            final long c = counts.get(keysArray[pos++]);
            post -= c;
        }
    }

    if (ASSERTS)
        assert pos == m;

    counts.clear();
    counts.trim();

    // We must keep the remap array small.
    if (best >= Integer.SIZE)
        best = Integer.SIZE - 1;

    LOGGER.debug("Best threshold: " + best);
    escape = (1 << best) - 1;
    System.arraycopy(keysArray, 0, remap = new long[escape], 0, remap.length);
    final Long2LongOpenHashMap map = new Long2LongOpenHashMap();
    map.defaultReturnValue(-1);
    for (int i = 0; i < escape; i++)
        map.put(remap[i], i);

    if (best != 0) {
        firstFunction = new GOV3Function.Builder<T>().keys(keys).transform(transform).store(chunkedHashStore)
                .values(new AbstractLongBigList() {
                    public long getLong(long index) {
                        long value = map.get(values.getLong(index));
                        return value == -1 ? escape : value;
                    }

                    public long size64() {
                        return n;
                    }
                }, best).indirect().build();

        LOGGER.debug("Actual bit cost per key of first function: " + (double) firstFunction.numBits() / n);
    } else
        firstFunction = null;

    chunkedHashStore.filter(new Predicate() {
        public boolean evaluate(Object triple) {
            return firstFunction == null || firstFunction.getLongByTriple((long[]) triple) == escape;
        }
    });

    secondFunction = new GOV3Function.Builder<T>().store(chunkedHashStore).values(values, w).indirect().build();

    this.seed = chunkedHashStore.seed();
    if (!givenChunkedHashStore)
        chunkedHashStore.close();

    LOGGER.debug("Actual bit cost per key of second function: " + (double) secondFunction.numBits() / n);

    LOGGER.info("Actual bit cost per key: " + (double) numBits() / n);
    LOGGER.info("Completed.");

}

From source file:it.unimi.dsi.sux4j.mph.TwoStepsMWHCFunction.java

/** Creates a new two-step function for the given keys and values.
 * //from w  w  w .j  a  v a  2s  .c  om
 * @param keys the keys in the domain of the function.
 * @param transform a transformation strategy for the keys.
 * @param values values to be assigned to each key, in the same order of the iterator returned by <code>keys</code>; if {@code null}, the
 * assigned value will the the ordinal number of each key.
 * @param tempDir a temporary directory for the store files, or {@code null} for the standard temporary directory.
 * @param chunkedHashStore a chunked hash store containing the keys associated with their rank, or {@code null}; the store
 * can be unchecked, but in this case <code>keys</code> and <code>transform</code> must be non-{@code null}. 
 */
protected TwoStepsMWHCFunction(final Iterable<? extends T> keys,
        final TransformationStrategy<? super T> transform, final LongBigList values, final File tempDir,
        ChunkedHashStore<T> chunkedHashStore) throws IOException {
    this.transform = transform;
    final ProgressLogger pl = new ProgressLogger(LOGGER);
    pl.displayLocalSpeed = true;
    pl.displayFreeMemory = true;
    final RandomGenerator random = new XorShift1024StarRandomGenerator();
    pl.itemsName = "keys";

    final boolean givenChunkedHashStore = chunkedHashStore != null;
    if (!givenChunkedHashStore) {
        if (keys == null)
            throw new IllegalArgumentException(
                    "If you do not provide a chunked hash store, you must provide the keys");
        chunkedHashStore = new ChunkedHashStore<T>(transform, pl);
        chunkedHashStore.reset(random.nextLong());
        chunkedHashStore.addAll(keys.iterator());
    }
    n = chunkedHashStore.size();
    defRetValue = -1; // For the very few cases in which we can decide

    if (n == 0) {
        rankMean = escape = width = 0;
        firstFunction = secondFunction = null;
        remap = null;
        if (!givenChunkedHashStore)
            chunkedHashStore.close();
        return;
    }

    // Compute distribution of values and maximum number of bits.
    int w = 0, size;
    long v;
    final Long2LongOpenHashMap counts = new Long2LongOpenHashMap();
    counts.defaultReturnValue(-1);
    for (LongIterator i = values.iterator(); i.hasNext();) {
        v = i.nextLong();
        counts.put(v, counts.get(v) + 1);
        size = Fast.length(v);
        if (size > w)
            w = size;
    }

    this.width = w;
    final int m = counts.size();

    LOGGER.debug("Generating two-steps MWHC function with " + w + " output bits...");

    // Sort keys by reverse frequency
    final long[] keysArray = counts.keySet().toLongArray(new long[m]);
    LongArrays.quickSort(keysArray, 0, keysArray.length, new AbstractLongComparator() {
        private static final long serialVersionUID = 1L;

        public int compare(final long a, final long b) {
            return Long.signum(counts.get(b) - counts.get(a));
        }
    });

    long mean = 0;
    for (int i = 0; i < keysArray.length; i++)
        mean += i * counts.get(keysArray[i]);
    rankMean = (double) mean / n;

    // Analyze data and choose a threshold
    long post = n, bestCost = Long.MAX_VALUE;
    int pos = 0, best = -1;

    // Examine every possible choice for r. Note that r = 0 implies one function, so we do not need to test the case r == w.
    for (int r = 0; r < w && pos < m; r++) {

        /* This cost function is dependent on the implementation of MWHCFunction. 
         * Note that for r = 0 we are actually computing the cost of a single function (the first one). */
        final long cost = (long) Math.min(HypergraphSorter.GAMMA * n * 1.126 + n * r,
                HypergraphSorter.GAMMA * n * r)
                + (long) Math.min(HypergraphSorter.GAMMA * post * 1.126 + post * w,
                        HypergraphSorter.GAMMA * post * w)
                + pos * Long.SIZE;

        if (cost < bestCost) {
            best = r;
            bestCost = cost;
        }

        /* We add to pre and subtract from post the counts of keys from position (1<<r)-1 to position (1<<r+1)-1. */
        for (int j = 0; j < (1 << r) && pos < m; j++) {
            final long c = counts.get(keysArray[pos++]);
            post -= c;
        }
    }

    if (ASSERTS)
        assert pos == m;

    counts.clear();
    counts.trim();

    // We must keep the remap array small.
    if (best >= Integer.SIZE)
        best = Integer.SIZE - 1;

    LOGGER.debug("Best threshold: " + best);
    escape = (1 << best) - 1;
    System.arraycopy(keysArray, 0, remap = new long[escape], 0, remap.length);
    final Long2LongOpenHashMap map = new Long2LongOpenHashMap();
    map.defaultReturnValue(-1);
    for (int i = 0; i < escape; i++)
        map.put(remap[i], i);

    if (best != 0) {
        firstFunction = new MWHCFunction.Builder<T>().keys(keys).transform(transform).store(chunkedHashStore)
                .values(new AbstractLongBigList() {
                    public long getLong(long index) {
                        long value = map.get(values.getLong(index));
                        return value == -1 ? escape : value;
                    }

                    public long size64() {
                        return n;
                    }
                }, best).indirect().build();

        LOGGER.debug("Actual bit cost per key of first function: " + (double) firstFunction.numBits() / n);
    } else
        firstFunction = null;

    chunkedHashStore.filter(new Predicate() {
        public boolean evaluate(Object triple) {
            return firstFunction == null || firstFunction.getLongByTriple((long[]) triple) == escape;
        }
    });

    secondFunction = new MWHCFunction.Builder<T>().store(chunkedHashStore).values(values, w).indirect().build();

    this.seed = chunkedHashStore.seed();
    if (!givenChunkedHashStore)
        chunkedHashStore.close();

    LOGGER.debug("Actual bit cost per key of second function: " + (double) secondFunction.numBits() / n);

    LOGGER.info("Actual bit cost per key: " + (double) numBits() / n);
    LOGGER.info("Completed.");

}

From source file:it.unimi.dsi.sux4j.mph.MinimalPerfectHashFunction.java

/**
 * Creates a new minimal perfect hash function for the given keys.
 * //from   w  w  w.ja v a 2 s  .  c  o  m
 * @param keys the keys to hash, or {@code null}.
 * @param transform a transformation strategy for the keys.
 * @param signatureWidth a signature width, or 0 for no signature.
 * @param tempDir a temporary directory for the store files, or {@code null} for the standard temporary directory.
 * @param chunkedHashStore a chunked hash store containing the keys, or {@code null}; the store
 * can be unchecked, but in this case <code>keys</code> and <code>transform</code> must be non-{@code null}. 
 */
protected MinimalPerfectHashFunction(final Iterable<? extends T> keys,
        final TransformationStrategy<? super T> transform, final int signatureWidth, final File tempDir,
        ChunkedHashStore<T> chunkedHashStore) throws IOException {
    this.transform = transform;

    final ProgressLogger pl = new ProgressLogger(LOGGER);
    pl.displayLocalSpeed = true;
    pl.displayFreeMemory = true;
    final RandomGenerator r = new XorShift1024StarRandomGenerator();
    pl.itemsName = "keys";

    final boolean givenChunkedHashStore = chunkedHashStore != null;
    if (!givenChunkedHashStore) {
        chunkedHashStore = new ChunkedHashStore<T>(transform, tempDir, pl);
        chunkedHashStore.reset(r.nextLong());
        chunkedHashStore.addAll(keys.iterator());
    }
    n = chunkedHashStore.size();

    defRetValue = -1; // For the very few cases in which we can decide

    int log2NumChunks = Math.max(0, Fast.mostSignificantBit(n >> LOG2_CHUNK_SIZE));
    chunkShift = chunkedHashStore.log2Chunks(log2NumChunks);
    final int numChunks = 1 << log2NumChunks;

    LOGGER.debug("Number of chunks: " + numChunks);

    seed = new long[numChunks];
    offset = new long[numChunks + 1];

    bitVector = LongArrayBitVector.getInstance();
    (values = bitVector.asLongBigList(2)).size(((long) Math.ceil(n * HypergraphSorter.GAMMA) + 4 * numChunks));
    array = bitVector.bits();

    int duplicates = 0;

    for (;;) {
        LOGGER.debug("Generating minimal perfect hash function...");

        long seed = 0;
        pl.expectedUpdates = numChunks;
        pl.itemsName = "chunks";
        pl.start("Analysing chunks... ");

        try {
            int q = 0;
            for (ChunkedHashStore.Chunk chunk : chunkedHashStore) {
                final HypergraphSorter<BitVector> sorter = new HypergraphSorter<BitVector>(chunk.size(), false);
                do {
                    seed = r.nextLong();
                } while (!sorter.generateAndSort(chunk.iterator(), seed));

                this.seed[q] = seed;
                offset[q + 1] = offset[q] + sorter.numVertices;

                /* We assign values. */
                int top = chunk.size(), k, v = 0;
                final int[] stack = sorter.stack;
                final int[] vertex1 = sorter.vertex1;
                final int[] vertex2 = sorter.vertex2;
                final long off = offset[q];

                while (top > 0) {
                    v = stack[--top];
                    k = (v > vertex1[v] ? 1 : 0) + (v > vertex2[v] ? 1 : 0);
                    assert k >= 0 && k < 3 : Integer.toString(k);
                    //System.err.println( "<" + v + ", " + vertex1[v] + ", " + vertex2[ v ]+ "> (" + k + ")" );
                    final long s = values.getLong(off + vertex1[v]) + values.getLong(off + vertex2[v]);
                    final long value = (k - s + 9) % 3;
                    assert values.getLong(off + v) == 0;
                    values.set(off + v, value == 0 ? 3 : value);
                }

                q++;
                pl.update();

                if (ASSERTS) {
                    final IntOpenHashSet pos = new IntOpenHashSet();
                    final int[] e = new int[3];
                    for (long[] triple : chunk) {
                        HypergraphSorter.tripleToEdge(triple, seed, sorter.numVertices, sorter.partSize, e);
                        assert pos.add(e[(int) (values.getLong(off + e[0]) + values.getLong(off + e[1])
                                + values.getLong(off + e[2])) % 3]);
                    }
                }
            }

            pl.done();
            break;
        } catch (ChunkedHashStore.DuplicateException e) {
            if (keys == null)
                throw new IllegalStateException(
                        "You provided no keys, but the chunked hash store was not checked");
            if (duplicates++ > 3)
                throw new IllegalArgumentException("The input list contains duplicates");
            LOGGER.warn("Found duplicate. Recomputing triples...");
            chunkedHashStore.reset(r.nextLong());
            chunkedHashStore.addAll(keys.iterator());
        }
    }

    globalSeed = chunkedHashStore.seed();

    if (n > 0) {
        long m = values.size64();

        final long length = bitVector.length();

        final int numWords = (int) ((length + Long.SIZE - 1) / Long.SIZE);

        final int numCounts = (int) ((length + 32 * Long.SIZE - 1) / (32 * Long.SIZE)) * 2;
        // Init rank/select structure
        count = new long[numCounts + 1];

        long c = 0;
        int pos = 0;
        for (int i = 0; i < numWords; i += WORDS_PER_SUPERBLOCK, pos += 2) {
            count[pos] = c;

            for (int j = 0; j < WORDS_PER_SUPERBLOCK; j++) {
                if (j != 0 && j % 6 == 0)
                    count[pos + 1] |= (i + j <= numWords ? c - count[pos] : 0x7FFL) << 12 * (j / 6 - 1);
                if (i + j < numWords)
                    c += countNonzeroPairs(array[i + j]);
            }
        }

        count[numCounts] = c;

        if (ASSERTS) {
            int k = 0;
            for (long i = 0; i < m; i++) {
                assert rank(i) == k : "(" + i + ") " + k + " != " + rank(i);
                if (values.getLong(i) != 0)
                    k++;
                assert k <= n;
            }

            if (keys != null) {
                final Iterator<? extends T> iterator = keys.iterator();
                for (long i = 0; i < n; i++)
                    assert getLong(iterator.next()) < n;
            }
        }
    } else
        count = LongArrays.EMPTY_ARRAY;

    LOGGER.info("Completed.");
    LOGGER.debug(
            "Forecast bit cost per key: " + (2 * HypergraphSorter.GAMMA + 2. * Long.SIZE / BITS_PER_BLOCK));
    LOGGER.info("Actual bit cost per key: " + (double) numBits() / n);

    if (signatureWidth != 0) {
        signatureMask = -1L >>> Long.SIZE - signatureWidth;
        (signatures = LongArrayBitVector.getInstance().asLongBigList(signatureWidth)).size(n);
        pl.expectedUpdates = n;
        pl.itemsName = "signatures";
        pl.start("Signing...");
        for (ChunkedHashStore.Chunk chunk : chunkedHashStore) {
            Iterator<long[]> iterator = chunk.iterator();
            for (int i = chunk.size(); i-- != 0;) {
                final long[] triple = iterator.next();
                final int[] e = new int[3];
                signatures.set(getLongByTripleNoCheck(triple, e), signatureMask & triple[0]);
                pl.lightUpdate();
            }
        }
        pl.done();
    } else {
        signatureMask = 0;
        signatures = null;
    }

    if (!givenChunkedHashStore)
        chunkedHashStore.close();
}

From source file:it.unimi.dsi.sux4j.mph.CHDMinimalPerfectHashFunction.java

/**
 * Creates a new CHD minimal perfect hash function for the given keys.
 * //from   w w w .  jav a  2 s  .  c o m
 * @param keys the keys to hash, or {@code null}.
 * @param transform a transformation strategy for the keys.
 * @param lambda the average bucket size.
 * @param loadFactor the load factor.
 * @param signatureWidth a signature width, or 0 for no signature.
 * @param tempDir a temporary directory for the store files, or {@code null} for the standard temporary directory.
 * @param chunkedHashStore a chunked hash store containing the keys, or {@code null}; the store
 * can be unchecked, but in this case <code>keys</code> and <code>transform</code> must be non-{@code null}. 
 */
protected CHDMinimalPerfectHashFunction(final Iterable<? extends T> keys,
        final TransformationStrategy<? super T> transform, final int lambda, double loadFactor,
        final int signatureWidth, final File tempDir, ChunkedHashStore<T> chunkedHashStore) throws IOException {
    this.transform = transform;

    final ProgressLogger pl = new ProgressLogger(LOGGER);
    pl.displayLocalSpeed = true;
    pl.displayFreeMemory = true;
    final RandomGenerator r = new XorShift1024StarRandomGenerator();
    pl.itemsName = "keys";

    final boolean givenChunkedHashStore = chunkedHashStore != null;
    if (!givenChunkedHashStore) {
        chunkedHashStore = new ChunkedHashStore<T>(transform, tempDir, pl);
        chunkedHashStore.reset(r.nextLong());
        chunkedHashStore.addAll(keys.iterator());
    }
    n = chunkedHashStore.size();

    defRetValue = -1; // For the very few cases in which we can decide

    int log2NumChunks = Math.max(0, Fast.mostSignificantBit(n >> LOG2_CHUNK_SIZE));
    chunkShift = chunkedHashStore.log2Chunks(log2NumChunks);
    final int numChunks = 1 << log2NumChunks;

    LOGGER.debug("Number of chunks: " + numChunks);
    LOGGER.debug("Average chunk size: " + (double) n / numChunks);

    offsetNumBucketsSeed = new long[(numChunks + 1) * 3 + 2];

    int duplicates = 0;
    final LongArrayList holes = new LongArrayList();

    @SuppressWarnings("resource")
    final OfflineIterable<MutableLong, MutableLong> coefficients = new OfflineIterable<MutableLong, MutableLong>(
            new Serializer<MutableLong, MutableLong>() {

                @Override
                public void write(final MutableLong a, final DataOutput dos) throws IOException {
                    long x = a.longValue();
                    while ((x & ~0x7FL) != 0) {
                        dos.writeByte((int) (x | 0x80));
                        x >>>= 7;
                    }
                    dos.writeByte((int) x);
                }

                @Override
                public void read(final DataInput dis, final MutableLong x) throws IOException {
                    byte b = dis.readByte();
                    long t = b & 0x7F;
                    for (int shift = 7; (b & 0x80) != 0; shift += 7) {
                        b = dis.readByte();
                        t |= (b & 0x7FL) << shift;
                    }
                    x.setValue(t);
                }
            }, new MutableLong());

    for (;;) {
        LOGGER.debug("Generating minimal perfect hash function...");

        holes.clear();
        coefficients.clear();
        pl.expectedUpdates = numChunks;
        pl.itemsName = "chunks";
        pl.start("Analysing chunks... ");

        try {
            int chunkNumber = 0;

            for (ChunkedHashStore.Chunk chunk : chunkedHashStore) {
                /* We treat a chunk as a single hash function. The number of bins is thus
                 * the first prime larger than the chunk size divided by the load factor. */
                final int p = Primes.nextPrime((int) Math.ceil(chunk.size() / loadFactor) + 1);
                final boolean used[] = new boolean[p];

                final int numBuckets = (chunk.size() + lambda - 1) / lambda;
                numBuckets(chunkNumber + 1, numBuckets(chunkNumber) + numBuckets);
                final int[] cc0 = new int[numBuckets];
                final int[] cc1 = new int[numBuckets];
                @SuppressWarnings("unchecked")
                final ArrayList<long[]>[] bucket = new ArrayList[numBuckets];
                for (int i = bucket.length; i-- != 0;)
                    bucket[i] = new ArrayList<long[]>();

                tryChunk: for (;;) {
                    for (ArrayList<long[]> b : bucket)
                        b.clear();
                    Arrays.fill(used, false);

                    /* At each try, the allocation to keys to bucket is randomized differently. */
                    final long seed = r.nextLong();
                    // System.err.println( "Number of keys: " + chunk.size()  + " Number of bins: " + p + " seed: " + seed );
                    /* We distribute the keys in this chunks in the buckets. */
                    for (Iterator<long[]> iterator = chunk.iterator(); iterator.hasNext();) {
                        final long[] triple = iterator.next();
                        final long[] h = new long[3];
                        Hashes.spooky4(triple, seed, h);
                        final ArrayList<long[]> b = bucket[(int) ((h[0] >>> 1) % numBuckets)];
                        h[1] = (int) ((h[1] >>> 1) % p);
                        h[2] = (int) ((h[2] >>> 1) % (p - 1)) + 1;

                        // All elements in a bucket must have either different h[ 1 ] or different h[ 2 ]
                        for (long[] t : b)
                            if (t[1] == h[1] && t[2] == h[2]) {
                                LOGGER.info("Duplicate index" + Arrays.toString(t));
                                continue tryChunk;
                            }
                        b.add(h);
                    }

                    final int[] perm = Util.identity(bucket.length);
                    IntArrays.quickSort(perm, new AbstractIntComparator() {
                        private static final long serialVersionUID = 1L;

                        @Override
                        public int compare(int a0, int a1) {
                            return Integer.compare(bucket[a1].size(), bucket[a0].size());
                        }
                    });

                    for (int i = 0; i < perm.length;) {
                        final LinkedList<Integer> bucketsToDo = new LinkedList<Integer>();
                        final int size = bucket[perm[i]].size();
                        //System.err.println( "Bucket size: " + size );
                        int j;
                        // Gather indices of all buckets with the same size
                        for (j = i; j < perm.length && bucket[perm[j]].size() == size; j++)
                            bucketsToDo.add(Integer.valueOf(perm[j]));

                        // Examine for each pair (c0,c1) the buckets still to do
                        ext: for (int c1 = 0; c1 < p; c1++)
                            for (int c0 = 0; c0 < p; c0++) {
                                //System.err.println( "Testing " + c0 + ", " + c1 + " (to do: " + bucketsToDo.size() + ")" );
                                for (Iterator<Integer> iterator = bucketsToDo.iterator(); iterator.hasNext();) {
                                    final int k = iterator.next().intValue();
                                    final ArrayList<long[]> b = bucket[k];
                                    boolean completed = true;
                                    final IntArrayList done = new IntArrayList();
                                    // Try to see whether the necessary entries are not used
                                    for (long[] h : b) {
                                        //assert k == h[ 0 ];

                                        int pos = (int) ((h[1] + c0 * h[2] + c1) % p);
                                        //System.err.println( "Testing pos " + pos + " for " + Arrays.toString( e  ));
                                        if (used[pos]) {
                                            completed = false;
                                            break;
                                        } else {
                                            used[pos] = true;
                                            done.add(pos);
                                        }
                                    }

                                    if (completed) {
                                        // All positions were free
                                        cc0[k] = c0;
                                        cc1[k] = c1;
                                        iterator.remove();
                                    } else
                                        for (int d : done)
                                            used[d] = false;
                                }
                                if (bucketsToDo.isEmpty())
                                    break ext;
                            }
                        if (!bucketsToDo.isEmpty())
                            continue tryChunk;

                        seed(chunkNumber, seed);
                        i = j;
                    }
                    break;
                }

                // System.err.println("DONE!");

                if (ASSERTS) {
                    final IntOpenHashSet pos = new IntOpenHashSet();
                    final long h[] = new long[3];
                    for (Iterator<long[]> iterator = chunk.iterator(); iterator.hasNext();) {
                        final long[] triple = iterator.next();
                        Hashes.spooky4(triple, seed(chunkNumber), h);
                        h[0] = (h[0] >>> 1) % numBuckets;
                        h[1] = (int) ((h[1] >>> 1) % p);
                        h[2] = (int) ((h[2] >>> 1) % (p - 1)) + 1;
                        //System.err.println( Arrays.toString(  e  ) );
                        assert pos.add((int) ((h[1] + cc0[(int) (h[0])] * h[2] + cc1[(int) (h[0])]) % p));
                    }
                }

                final MutableLong l = new MutableLong();
                for (int i = 0; i < numBuckets; i++) {
                    l.setValue(cc0[i] + cc1[i] * p);
                    coefficients.add(l);
                }

                for (int i = 0; i < p; i++)
                    if (!used[i])
                        holes.add(offset(chunkNumber) + i);

                offset(chunkNumber + 1, offset(chunkNumber) + p);
                chunkNumber++;
                pl.update();
            }

            pl.done();
            break;
        } catch (ChunkedHashStore.DuplicateException e) {
            if (keys == null)
                throw new IllegalStateException(
                        "You provided no keys, but the chunked hash store was not checked");
            if (duplicates++ > 3)
                throw new IllegalArgumentException("The input list contains duplicates");
            LOGGER.warn("Found duplicate. Recomputing triples...");
            chunkedHashStore.reset(r.nextLong());
            chunkedHashStore.addAll(keys.iterator());
        }
    }

    rank = new SparseRank(offset(offsetNumBucketsSeed.length / 3 - 1), holes.size(), holes.iterator());

    globalSeed = chunkedHashStore.seed();

    this.coefficients = new EliasFanoLongBigList(new AbstractLongIterator() {
        final OfflineIterator<MutableLong, MutableLong> iterator = coefficients.iterator();

        @Override
        public boolean hasNext() {
            return iterator.hasNext();
        }

        public long nextLong() {
            return iterator.next().longValue();
        }
    }, 0, true);

    coefficients.close();

    LOGGER.info("Completed.");
    LOGGER.info("Actual bit cost per key: " + (double) numBits() / n);

    if (signatureWidth != 0) {
        signatureMask = -1L >>> Long.SIZE - signatureWidth;
        (signatures = LongArrayBitVector.getInstance().asLongBigList(signatureWidth)).size(n);
        pl.expectedUpdates = n;
        pl.itemsName = "signatures";
        pl.start("Signing...");
        for (ChunkedHashStore.Chunk chunk : chunkedHashStore) {
            Iterator<long[]> iterator = chunk.iterator();
            for (int i = chunk.size(); i-- != 0;) {
                final long[] triple = iterator.next();
                long t = getLongByTripleNoCheck(triple);
                signatures.set(t, signatureMask & triple[0]);
                pl.lightUpdate();
            }
        }
        pl.done();
    } else {
        signatureMask = 0;
        signatures = null;
    }

    if (!givenChunkedHashStore)
        chunkedHashStore.close();
}

From source file:it.unimi.dsi.sux4j.io.ChunkedHashStore.java

/** Checks that this store has no duplicate triples, and try to rebuild if this fails to happen.
 * /*from  ww w. j a  v  a  2 s .  c  o  m*/
 * @param iterable the elements with which the store will be refilled if there are duplicate triples.
 * @param values the values that will be associated with the elements returned by <code>iterable</code>. 
 * @throws IllegalArgumentException if after a few trials the store still contains duplicate triples.
 */
public void checkAndRetry(final Iterable<? extends T> iterable, final LongIterable values) throws IOException {
    final RandomGenerator random = new XorShift1024StarRandomGenerator();
    int duplicates = 0;

    for (;;)
        try {
            check();
            break;
        } catch (DuplicateException e) {
            if (duplicates++ > 3)
                throw new IllegalArgumentException("The input list contains duplicates");
            LOGGER.warn("Found duplicate. Recomputing triples...");
            reset(random.nextLong());
            addAll(iterable.iterator(), values.iterator());
        }

    checkedForDuplicates = true;
}

From source file:it.unimi.dsi.sux4j.mph.GOV4Function.java

/** Creates a new function for the given keys and values.
 * //from   w ww . j  ava2s .  co m
 * @param keys the keys in the domain of the function, or {@code null}.
 * @param transform a transformation strategy for the keys.
 * @param signatureWidth a positive number for a signature width, 0 for no signature, a negative value for a self-signed function; if nonzero, {@code values} must be {@code null} and {@code width} must be -1.
 * @param values values to be assigned to each element, in the same order of the iterator returned by <code>keys</code>; if {@code null}, the
 * assigned value will the the ordinal number of each element.
 * @param dataWidth the bit width of the <code>values</code>, or -1 if <code>values</code> is {@code null}.
 * @param tempDir a temporary directory for the store files, or {@code null} for the standard temporary directory.
 * @param chunkedHashStore a chunked hash store containing the keys associated with their ranks (if there are no values, or {@code indirect} is true)
 * or values, or {@code null}; the store
 * can be unchecked, but in this case <code>keys</code> and <code>transform</code> must be non-{@code null}. 
 * @param indirect if true, <code>chunkedHashStore</code> contains ordinal positions, and <code>values</code> is a {@link LongIterable} that
 * must be accessed to retrieve the actual values. 
 */
protected GOV4Function(final Iterable<? extends T> keys, final TransformationStrategy<? super T> transform,
        int signatureWidth, final LongIterable values, final int dataWidth, final File tempDir,
        ChunkedHashStore<T> chunkedHashStore, final boolean indirect) throws IOException {
    this.transform = transform;

    if (signatureWidth != 0 && values != null)
        throw new IllegalArgumentException("You cannot sign a function if you specify its values");
    if (signatureWidth != 0 && dataWidth != -1)
        throw new IllegalArgumentException("You cannot specify a signature width and a data width");

    final ProgressLogger pl = new ProgressLogger(LOGGER);
    pl.displayLocalSpeed = true;
    pl.displayFreeMemory = true;
    final RandomGenerator r = new XorShift1024StarRandomGenerator();
    pl.itemsName = "keys";

    final boolean givenChunkedHashStore = chunkedHashStore != null;
    if (!givenChunkedHashStore) {
        if (keys == null)
            throw new IllegalArgumentException(
                    "If you do not provide a chunked hash store, you must provide the keys");
        chunkedHashStore = new ChunkedHashStore<T>(transform, tempDir, -Math.min(signatureWidth, 0), pl);
        chunkedHashStore.reset(r.nextLong());
        if (values == null || indirect)
            chunkedHashStore.addAll(keys.iterator());
        else
            chunkedHashStore.addAll(keys.iterator(), values != null ? values.iterator() : null);
    }
    n = chunkedHashStore.size();
    defRetValue = signatureWidth < 0 ? 0 : -1; // Self-signed maps get zero as default resturn value.

    if (n == 0) {
        m = this.globalSeed = chunkShift = this.width = 0;
        data = null;
        offsetAndSeed = null;
        signatureMask = 0;
        signatures = null;
        return;
    }

    int log2NumChunks = Math.max(0, Fast.mostSignificantBit(n >> LOG2_CHUNK_SIZE));
    chunkShift = chunkedHashStore.log2Chunks(log2NumChunks);
    final int numChunks = 1 << log2NumChunks;

    LOGGER.debug("Number of chunks: " + numChunks);

    offsetAndSeed = new long[numChunks + 1];

    this.width = signatureWidth < 0 ? -signatureWidth : dataWidth == -1 ? Fast.ceilLog2(n) : dataWidth;

    // Candidate data; might be discarded for compaction.
    @SuppressWarnings("resource")
    final OfflineIterable<BitVector, LongArrayBitVector> offlineData = new OfflineIterable<BitVector, LongArrayBitVector>(
            BitVectors.OFFLINE_SERIALIZER, LongArrayBitVector.getInstance());

    int duplicates = 0;

    for (;;) {
        LOGGER.debug("Generating GOV function with " + this.width + " output bits...");

        pl.expectedUpdates = numChunks;
        pl.itemsName = "chunks";
        pl.start("Analysing chunks... ");

        try {
            int q = 0;
            final LongArrayBitVector dataBitVector = LongArrayBitVector.getInstance();
            final LongBigList data = dataBitVector.asLongBigList(this.width);
            long unsolvable = 0;
            for (final ChunkedHashStore.Chunk chunk : chunkedHashStore) {

                offsetAndSeed[q + 1] = offsetAndSeed[q]
                        + Math.max((C_TIMES_256 * chunk.size() >>> 8), chunk.size() + 1);

                long seed = 0;
                final int v = (int) (offsetAndSeed[q + 1] - offsetAndSeed[q]);
                final Linear4SystemSolver<BitVector> solver = new Linear4SystemSolver<BitVector>(v,
                        chunk.size());

                for (;;) {
                    final boolean solved = solver.generateAndSolve(chunk, seed, new AbstractLongBigList() {
                        private final LongBigList valueList = indirect
                                ? (values instanceof LongList ? LongBigLists.asBigList((LongList) values)
                                        : (LongBigList) values)
                                : null;

                        @Override
                        public long size64() {
                            return chunk.size();
                        }

                        @Override
                        public long getLong(final long index) {
                            return indirect ? valueList.getLong(chunk.data(index)) : chunk.data(index);
                        }
                    });
                    unsolvable += solver.unsolvable;
                    if (solved)
                        break;
                    seed += SEED_STEP;
                    if (seed == 0)
                        throw new AssertionError("Exhausted local seeds");
                }

                this.offsetAndSeed[q] |= seed;

                dataBitVector.fill(false);
                data.size(v);
                q++;

                /* We assign values. */
                final long[] solution = solver.solution;
                for (int i = 0; i < solution.length; i++)
                    data.set(i, solution[i]);

                offlineData.add(dataBitVector);
                pl.update();
            }

            LOGGER.info("Unsolvable systems: " + unsolvable + "/" + numChunks + " ("
                    + Util.format(100.0 * unsolvable / numChunks) + "%)");

            pl.done();
            break;
        } catch (ChunkedHashStore.DuplicateException e) {
            if (keys == null)
                throw new IllegalStateException(
                        "You provided no keys, but the chunked hash store was not checked");
            if (duplicates++ > 3)
                throw new IllegalArgumentException("The input list contains duplicates");
            LOGGER.warn("Found duplicate. Recomputing triples...");
            chunkedHashStore.reset(r.nextLong());
            pl.itemsName = "keys";
            if (values == null || indirect)
                chunkedHashStore.addAll(keys.iterator());
            else
                chunkedHashStore.addAll(keys.iterator(), values != null ? values.iterator() : null);
        }
    }

    if (DEBUG)
        System.out.println("Offsets: " + Arrays.toString(offsetAndSeed));

    globalSeed = chunkedHashStore.seed();
    m = offsetAndSeed[offsetAndSeed.length - 1];
    final LongArrayBitVector dataBitVector = LongArrayBitVector.getInstance(m * this.width);
    this.data = dataBitVector.asLongBigList(this.width);

    OfflineIterator<BitVector, LongArrayBitVector> iterator = offlineData.iterator();
    while (iterator.hasNext())
        dataBitVector.append(iterator.next());
    iterator.close();

    offlineData.close();

    LOGGER.info("Completed.");
    LOGGER.info("Forecast bit cost per element: " + C * this.width);
    LOGGER.info("Actual bit cost per element: " + (double) numBits() / n);

    if (signatureWidth > 0) {
        signatureMask = -1L >>> Long.SIZE - signatureWidth;
        signatures = chunkedHashStore.signatures(signatureWidth, pl);
    } else if (signatureWidth < 0) {
        signatureMask = -1L >>> Long.SIZE + signatureWidth;
        signatures = null;
    } else {
        signatureMask = 0;
        signatures = null;
    }

    if (!givenChunkedHashStore)
        chunkedHashStore.close();
}

From source file:it.unimi.dsi.sux4j.mph.MWHCFunction.java

/** Creates a new function for the given keys and values.
 * // ww  w .  j av  a  2  s .co  m
 * @param keys the keys in the domain of the function, or {@code null}.
 * @param transform a transformation strategy for the keys.
 * @param signatureWidth a positive number for a signature width, 0 for no signature, a negative value for a self-signed function; if nonzero, {@code values} must be {@code null} and {@code width} must be -1.
 * @param values values to be assigned to each element, in the same order of the iterator returned by <code>keys</code>; if {@code null}, the
 * assigned value will the the ordinal number of each element.
 * @param dataWidth the bit width of the <code>values</code>, or -1 if <code>values</code> is {@code null}.
 * @param tempDir a temporary directory for the store files, or {@code null} for the standard temporary directory.
 * @param chunkedHashStore a chunked hash store containing the keys associated with their ranks (if there are no values, or {@code indirect} is true)
 * or values, or {@code null}; the store
 * can be unchecked, but in this case <code>keys</code> and <code>transform</code> must be non-{@code null}. 
 * @param indirect if true, <code>chunkedHashStore</code> contains ordinal positions, and <code>values</code> is a {@link LongIterable} that
 * must be accessed to retrieve the actual values. 
 */
protected MWHCFunction(final Iterable<? extends T> keys, final TransformationStrategy<? super T> transform,
        int signatureWidth, final LongIterable values, final int dataWidth, final File tempDir,
        ChunkedHashStore<T> chunkedHashStore, boolean indirect) throws IOException {
    this.transform = transform;

    if (signatureWidth != 0 && values != null)
        throw new IllegalArgumentException("You cannot sign a function if you specify its values");
    if (signatureWidth != 0 && dataWidth != -1)
        throw new IllegalArgumentException("You cannot specify a signature width and a data width");

    // If we have no keys, values must be a random-access list of longs.
    final LongBigList valueList = indirect
            ? (values instanceof LongList ? LongBigLists.asBigList((LongList) values) : (LongBigList) values)
            : null;

    final ProgressLogger pl = new ProgressLogger(LOGGER);
    pl.displayLocalSpeed = true;
    pl.displayFreeMemory = true;
    final RandomGenerator r = new XorShift1024StarRandomGenerator();
    pl.itemsName = "keys";

    final boolean givenChunkedHashStore = chunkedHashStore != null;
    if (!givenChunkedHashStore) {
        if (keys == null)
            throw new IllegalArgumentException(
                    "If you do not provide a chunked hash store, you must provide the keys");
        chunkedHashStore = new ChunkedHashStore<T>(transform, tempDir, -Math.min(signatureWidth, 0), pl);
        chunkedHashStore.reset(r.nextLong());
        if (values == null || indirect)
            chunkedHashStore.addAll(keys.iterator());
        else
            chunkedHashStore.addAll(keys.iterator(), values != null ? values.iterator() : null);
    }
    n = chunkedHashStore.size();
    defRetValue = signatureWidth < 0 ? 0 : -1; // Self-signed maps get zero as default resturn value.

    if (n == 0) {
        m = this.globalSeed = chunkShift = this.width = 0;
        data = null;
        marker = null;
        rank = null;
        seed = null;
        offset = null;
        signatureMask = 0;
        signatures = null;
        return;
    }

    int log2NumChunks = Math.max(0, Fast.mostSignificantBit(n >> LOG2_CHUNK_SIZE));
    chunkShift = chunkedHashStore.log2Chunks(log2NumChunks);
    final int numChunks = 1 << log2NumChunks;

    LOGGER.debug("Number of chunks: " + numChunks);

    seed = new long[numChunks];
    offset = new long[numChunks + 1];

    this.width = signatureWidth < 0 ? -signatureWidth : dataWidth == -1 ? Fast.ceilLog2(n) : dataWidth;

    // Candidate data; might be discarded for compaction.
    @SuppressWarnings("resource")
    final OfflineIterable<BitVector, LongArrayBitVector> offlineData = new OfflineIterable<BitVector, LongArrayBitVector>(
            BitVectors.OFFLINE_SERIALIZER, LongArrayBitVector.getInstance());

    int duplicates = 0;

    for (;;) {
        LOGGER.debug("Generating MWHC function with " + this.width + " output bits...");

        long seed = 0;
        pl.expectedUpdates = numChunks;
        pl.itemsName = "chunks";
        pl.start("Analysing chunks... ");

        try {
            int q = 0;
            final LongArrayBitVector dataBitVector = LongArrayBitVector.getInstance();
            final LongBigList data = dataBitVector.asLongBigList(this.width);
            for (ChunkedHashStore.Chunk chunk : chunkedHashStore) {
                HypergraphSorter<BitVector> sorter = new HypergraphSorter<BitVector>(chunk.size());
                do {
                    seed = r.nextLong();
                } while (!sorter.generateAndSort(chunk.iterator(), seed));

                this.seed[q] = seed;
                dataBitVector.fill(false);
                data.size(sorter.numVertices);
                offset[q + 1] = offset[q] + sorter.numVertices;

                /* We assign values. */

                int top = chunk.size(), x, k;
                final int[] stack = sorter.stack;
                final int[] vertex1 = sorter.vertex1;
                final int[] vertex2 = sorter.vertex2;
                final int[] edge = sorter.edge;

                while (top > 0) {
                    x = stack[--top];
                    k = edge[x];
                    final long s = data.getLong(vertex1[x]) ^ data.getLong(vertex2[x]);
                    final long value = indirect ? valueList.getLong(chunk.data(k)) : chunk.data(k);
                    data.set(x, value ^ s);

                    if (ASSERTS)
                        assert (value == (data.getLong(x) ^ data.getLong(vertex1[x])
                                ^ data.getLong(vertex2[x]))) : "<" + x + "," + vertex1[x] + "," + vertex2[x]
                                        + ">: " + value + " != " + (data.getLong(x) ^ data.getLong(vertex1[x])
                                                ^ data.getLong(vertex2[x]));
                }

                q++;
                offlineData.add(dataBitVector);
                pl.update();
            }

            pl.done();
            break;
        } catch (ChunkedHashStore.DuplicateException e) {
            if (keys == null)
                throw new IllegalStateException(
                        "You provided no keys, but the chunked hash store was not checked");
            if (duplicates++ > 3)
                throw new IllegalArgumentException("The input list contains duplicates");
            LOGGER.warn("Found duplicate. Recomputing triples...");
            chunkedHashStore.reset(r.nextLong());
            pl.itemsName = "keys";
            if (values == null || indirect)
                chunkedHashStore.addAll(keys.iterator());
            else
                chunkedHashStore.addAll(keys.iterator(), values != null ? values.iterator() : null);
        }
    }

    if (DEBUG)
        System.out.println("Offsets: " + Arrays.toString(offset));

    globalSeed = chunkedHashStore.seed();

    // Check for compaction
    long nonZero = 0;
    m = offset[offset.length - 1];

    {
        final OfflineIterator<BitVector, LongArrayBitVector> iterator = offlineData.iterator();
        while (iterator.hasNext()) {
            final LongBigList data = iterator.next().asLongBigList(this.width);
            for (long i = 0; i < data.size64(); i++)
                if (data.getLong(i) != 0)
                    nonZero++;
        }
        iterator.close();
    }
    // We estimate size using Rank16
    if (nonZero * this.width + m * 1.126 < m * this.width) {
        LOGGER.info("Compacting...");
        marker = LongArrayBitVector.ofLength(m);
        final LongBigList newData = LongArrayBitVector.getInstance().asLongBigList(this.width);
        newData.size(nonZero);
        nonZero = 0;

        final OfflineIterator<BitVector, LongArrayBitVector> iterator = offlineData.iterator();
        long j = 0;
        while (iterator.hasNext()) {
            final LongBigList data = iterator.next().asLongBigList(this.width);
            for (long i = 0; i < data.size64(); i++, j++) {
                final long value = data.getLong(i);
                if (value != 0) {
                    marker.set(j);
                    newData.set(nonZero++, value);
                }
            }
        }
        iterator.close();

        rank = new Rank16(marker);

        if (ASSERTS) {
            final OfflineIterator<BitVector, LongArrayBitVector> iterator2 = offlineData.iterator();
            long k = 0;
            while (iterator2.hasNext()) {
                final LongBigList data = iterator2.next().asLongBigList(this.width);
                for (long i = 0; i < data.size64(); i++, k++) {
                    final long value = data.getLong(i);
                    assert (value != 0) == marker.getBoolean(k);
                    if (value != 0)
                        assert value == newData.getLong(rank.rank(k)) : value + " != "
                                + newData.getLong(rank.rank(k));
                }
            }
            iterator2.close();
        }
        this.data = newData;
    } else {
        final LongArrayBitVector dataBitVector = LongArrayBitVector.getInstance(m * this.width);
        this.data = dataBitVector.asLongBigList(this.width);

        OfflineIterator<BitVector, LongArrayBitVector> iterator = offlineData.iterator();
        while (iterator.hasNext())
            dataBitVector.append(iterator.next());
        iterator.close();

        marker = null;
        rank = null;
    }

    offlineData.close();

    LOGGER.info("Completed.");
    LOGGER.debug("Forecast bit cost per element: " + (marker == null ? HypergraphSorter.GAMMA * this.width
            : HypergraphSorter.GAMMA + this.width + 0.126));
    LOGGER.info("Actual bit cost per element: " + (double) numBits() / n);

    if (signatureWidth > 0) {
        signatureMask = -1L >>> Long.SIZE - signatureWidth;
        signatures = chunkedHashStore.signatures(signatureWidth, pl);
    } else if (signatureWidth < 0) {
        signatureMask = -1L >>> Long.SIZE + signatureWidth;
        signatures = null;
    } else {
        signatureMask = 0;
        signatures = null;
    }

    if (!givenChunkedHashStore)
        chunkedHashStore.close();
}

From source file:it.unimi.dsi.sux4j.mph.GOV3Function.java

/** Creates a new function for the given keys and values.
 * /* ww  w .  jav a2s.c  om*/
 * @param keys the keys in the domain of the function, or {@code null}.
 * @param transform a transformation strategy for the keys.
 * @param signatureWidth a positive number for a signature width, 0 for no signature, a negative value for a self-signed function; if nonzero, {@code values} must be {@code null} and {@code width} must be -1.
 * @param values values to be assigned to each element, in the same order of the iterator returned by <code>keys</code>; if {@code null}, the
 * assigned value will the the ordinal number of each element.
 * @param dataWidth the bit width of the <code>values</code>, or -1 if <code>values</code> is {@code null}.
 * @param indirect if true, <code>chunkedHashStore</code> contains ordinal positions, and <code>values</code> is a {@link LongIterable} that
 * must be accessed to retrieve the actual values.
 * @param compacted if true, the coefficients will be compacted. 
 * @param tempDir a temporary directory for the store files, or {@code null} for the standard temporary directory.
 * @param chunkedHashStore a chunked hash store containing the keys associated with their ranks (if there are no values, or {@code indirect} is true)
 * or values, or {@code null}; the store
 * can be unchecked, but in this case <code>keys</code> and <code>transform</code> must be non-{@code null}. 
 */
protected GOV3Function(final Iterable<? extends T> keys, final TransformationStrategy<? super T> transform,
        int signatureWidth, final LongIterable values, final int dataWidth, final boolean indirect,
        final boolean compacted, final File tempDir, ChunkedHashStore<T> chunkedHashStore) throws IOException {
    this.transform = transform;

    if (signatureWidth != 0 && values != null)
        throw new IllegalArgumentException("You cannot sign a function if you specify its values");
    if (signatureWidth != 0 && dataWidth != -1)
        throw new IllegalArgumentException("You cannot specify a signature width and a data width");

    final ProgressLogger pl = new ProgressLogger(LOGGER);
    pl.displayLocalSpeed = true;
    pl.displayFreeMemory = true;
    final RandomGenerator r = new XorShift1024StarRandomGenerator();
    pl.itemsName = "keys";

    final boolean givenChunkedHashStore = chunkedHashStore != null;
    if (!givenChunkedHashStore) {
        if (keys == null)
            throw new IllegalArgumentException(
                    "If you do not provide a chunked hash store, you must provide the keys");
        chunkedHashStore = new ChunkedHashStore<T>(transform, tempDir, -Math.min(signatureWidth, 0), pl);
        chunkedHashStore.reset(r.nextLong());
        if (values == null || indirect)
            chunkedHashStore.addAll(keys.iterator());
        else
            chunkedHashStore.addAll(keys.iterator(), values != null ? values.iterator() : null);
    }
    n = chunkedHashStore.size();
    defRetValue = signatureWidth < 0 ? 0 : -1; // Self-signed maps get zero as default resturn value.

    if (n == 0) {
        m = this.globalSeed = chunkShift = this.width = 0;
        data = null;
        marker = null;
        rank = null;
        offsetAndSeed = null;
        signatureMask = 0;
        signatures = null;
        if (!givenChunkedHashStore)
            chunkedHashStore.close();
        return;
    }

    int log2NumChunks = Math.max(0, Fast.mostSignificantBit(n >> LOG2_CHUNK_SIZE));
    chunkShift = chunkedHashStore.log2Chunks(log2NumChunks);
    final int numChunks = 1 << log2NumChunks;

    LOGGER.debug("Number of chunks: " + numChunks);

    offsetAndSeed = new long[numChunks + 1];

    this.width = signatureWidth < 0 ? -signatureWidth : dataWidth == -1 ? Fast.ceilLog2(n) : dataWidth;

    // Candidate data; might be discarded for compaction.
    @SuppressWarnings("resource")
    final OfflineIterable<BitVector, LongArrayBitVector> offlineData = new OfflineIterable<BitVector, LongArrayBitVector>(
            BitVectors.OFFLINE_SERIALIZER, LongArrayBitVector.getInstance());

    int duplicates = 0;

    for (;;) {
        LOGGER.debug("Generating GOV function with " + this.width + " output bits...");

        pl.expectedUpdates = numChunks;
        pl.itemsName = "chunks";
        pl.start("Analysing chunks... ");

        try {
            int q = 0;
            final LongArrayBitVector dataBitVector = LongArrayBitVector.getInstance();
            final LongBigList data = dataBitVector.asLongBigList(this.width);
            long unsolvable = 0;
            for (final ChunkedHashStore.Chunk chunk : chunkedHashStore) {

                offsetAndSeed[q + 1] = offsetAndSeed[q] + (C_TIMES_256 * chunk.size() >>> 8);

                long seed = 0;
                final int v = (int) (offsetAndSeed[q + 1] - offsetAndSeed[q]);
                final Linear3SystemSolver<BitVector> solver = new Linear3SystemSolver<BitVector>(v,
                        chunk.size());

                for (;;) {
                    final boolean solved = solver.generateAndSolve(chunk, seed, new AbstractLongBigList() {
                        private final LongBigList valueList = indirect
                                ? (values instanceof LongList ? LongBigLists.asBigList((LongList) values)
                                        : (LongBigList) values)
                                : null;

                        @Override
                        public long size64() {
                            return chunk.size();
                        }

                        @Override
                        public long getLong(final long index) {
                            return indirect ? valueList.getLong(chunk.data(index)) : chunk.data(index);
                        }
                    });
                    unsolvable += solver.unsolvable;
                    if (solved)
                        break;
                    seed += SEED_STEP;
                    if (seed == 0)
                        throw new AssertionError("Exhausted local seeds");
                }

                this.offsetAndSeed[q] |= seed;

                dataBitVector.fill(false);
                data.size(v);
                q++;

                /* We assign values. */
                final long[] solution = solver.solution;
                for (int i = 0; i < solution.length; i++)
                    data.set(i, solution[i]);

                offlineData.add(dataBitVector);
                pl.update();
            }

            LOGGER.info("Unsolvable systems: " + unsolvable + "/" + numChunks + " ("
                    + Util.format(100.0 * unsolvable / numChunks) + "%)");

            pl.done();
            break;
        } catch (ChunkedHashStore.DuplicateException e) {
            if (keys == null)
                throw new IllegalStateException(
                        "You provided no keys, but the chunked hash store was not checked");
            if (duplicates++ > 3)
                throw new IllegalArgumentException("The input list contains duplicates");
            LOGGER.warn("Found duplicate. Recomputing triples...");
            chunkedHashStore.reset(r.nextLong());
            pl.itemsName = "keys";
            if (values == null || indirect)
                chunkedHashStore.addAll(keys.iterator());
            else
                chunkedHashStore.addAll(keys.iterator(), values != null ? values.iterator() : null);
        }
    }

    if (DEBUG)
        System.out.println("Offsets: " + Arrays.toString(offsetAndSeed));

    globalSeed = chunkedHashStore.seed();

    // Check for compaction
    long nonZero = 0;
    m = offsetAndSeed[offsetAndSeed.length - 1];

    {
        final OfflineIterator<BitVector, LongArrayBitVector> iterator = offlineData.iterator();
        while (iterator.hasNext()) {
            final LongBigList data = iterator.next().asLongBigList(this.width);
            for (long i = 0; i < data.size64(); i++)
                if (data.getLong(i) != 0)
                    nonZero++;
        }
        iterator.close();
    }

    if (compacted) {
        LOGGER.info("Compacting...");
        marker = LongArrayBitVector.ofLength(m);
        final LongBigList newData = LongArrayBitVector.getInstance().asLongBigList(this.width);
        newData.size(nonZero);
        nonZero = 0;

        final OfflineIterator<BitVector, LongArrayBitVector> iterator = offlineData.iterator();
        long j = 0;
        while (iterator.hasNext()) {
            final LongBigList data = iterator.next().asLongBigList(this.width);
            for (long i = 0; i < data.size64(); i++, j++) {
                final long value = data.getLong(i);
                if (value != 0) {
                    marker.set(j);
                    newData.set(nonZero++, value);
                }
            }
        }
        iterator.close();

        rank = new Rank16(marker);

        if (ASSERTS) {
            final OfflineIterator<BitVector, LongArrayBitVector> iterator2 = offlineData.iterator();
            long k = 0;
            while (iterator2.hasNext()) {
                final LongBigList data = iterator2.next().asLongBigList(this.width);
                for (long i = 0; i < data.size64(); i++, k++) {
                    final long value = data.getLong(i);
                    assert (value != 0) == marker.getBoolean(k);
                    if (value != 0)
                        assert value == newData.getLong(rank.rank(k)) : value + " != "
                                + newData.getLong(rank.rank(k));
                }
            }
            iterator2.close();
        }
        this.data = newData;
    } else {
        final LongArrayBitVector dataBitVector = LongArrayBitVector.getInstance(m * this.width);
        this.data = dataBitVector.asLongBigList(this.width);

        OfflineIterator<BitVector, LongArrayBitVector> iterator = offlineData.iterator();
        while (iterator.hasNext())
            dataBitVector.append(iterator.next());
        iterator.close();

        marker = null;
        rank = null;
    }

    offlineData.close();

    LOGGER.info("Completed.");
    LOGGER.debug(
            "Forecast bit cost per element: " + (marker == null ? C * this.width : C + this.width + 0.126));
    LOGGER.info("Actual bit cost per element: " + (double) numBits() / n);

    if (signatureWidth > 0) {
        signatureMask = -1L >>> Long.SIZE - signatureWidth;
        signatures = chunkedHashStore.signatures(signatureWidth, pl);
    } else if (signatureWidth < 0) {
        signatureMask = -1L >>> Long.SIZE + signatureWidth;
        signatures = null;
    } else {
        signatureMask = 0;
        signatures = null;
    }

    if (!givenChunkedHashStore)
        chunkedHashStore.close();
}

From source file:it.unimi.dsi.sux4j.mph.VLLcpMonotoneMinimalPerfectHashFunction.java

@SuppressWarnings("unused")
public VLLcpMonotoneMinimalPerfectHashFunction(final Iterable<? extends T> iterable, final int numElements,
        final TransformationStrategy<? super T> transform) throws IOException {

    final ProgressLogger pl = new ProgressLogger(LOGGER);
    pl.displayLocalSpeed = true;//from www .java  2s  .  c  o m
    pl.displayFreeMemory = true;
    this.transform = transform;
    final RandomGenerator r = new XorShift1024StarRandomGenerator();

    if (numElements == -1) {
        if (iterable instanceof Size64)
            n = ((Size64) iterable).size64();
        else if (iterable instanceof Collection)
            n = ((Collection<?>) iterable).size();
        else {
            long c = 0;
            for (T dummy : iterable)
                c++;
            n = c;
        }
    } else
        n = numElements;

    if (n == 0) {
        bucketSize = bucketSizeMask = log2BucketSize = 0;
        lcp2Bucket = null;
        offsets = null;
        lcpLengths = null;
        mph = null;
        return;
    }

    defRetValue = -1; // For the very few cases in which we can decide

    int theoreticalBucketSize = (int) Math
            .ceil(1 + GOV3Function.C * Math.log(2) + Math.log(n) - Math.log(1 + Math.log(n)));
    log2BucketSize = Fast.ceilLog2(theoreticalBucketSize);
    bucketSize = 1 << log2BucketSize;
    bucketSizeMask = bucketSize - 1;

    final long numBuckets = (n + bucketSize - 1) / bucketSize;

    LongArrayBitVector prev = LongArrayBitVector.getInstance();
    LongArrayBitVector curr = LongArrayBitVector.getInstance();
    int currLcp = 0;
    int maxLcp = 0, minLcp = Integer.MAX_VALUE;
    long maxLength = 0, totalLength = 0;

    @SuppressWarnings("resource")
    final ChunkedHashStore<BitVector> chunkedHashStore = new ChunkedHashStore<BitVector>(
            TransformationStrategies.identity(), pl);
    chunkedHashStore.reset(r.nextLong());
    @SuppressWarnings("resource")
    OfflineIterable<BitVector, LongArrayBitVector> lcps = new OfflineIterable<BitVector, LongArrayBitVector>(
            BitVectors.OFFLINE_SERIALIZER, LongArrayBitVector.getInstance());
    pl.expectedUpdates = n;
    pl.start("Scanning collection...");

    Iterator<? extends T> iterator = iterable.iterator();
    for (long b = 0; b < numBuckets; b++) {
        prev.replace(transform.toBitVector(iterator.next()));
        chunkedHashStore.add(prev);
        pl.lightUpdate();
        maxLength = Math.max(maxLength, prev.length());
        totalLength += Fast.length(1 + prev.length());
        currLcp = (int) prev.length();
        final int currBucketSize = (int) Math.min(bucketSize, n - b * bucketSize);

        for (int i = 0; i < currBucketSize - 1; i++) {
            curr.replace(transform.toBitVector(iterator.next()));
            chunkedHashStore.add(curr);
            pl.lightUpdate();
            final int prefix = (int) curr.longestCommonPrefixLength(prev);
            if (prefix == prev.length() && prefix == curr.length())
                throw new IllegalArgumentException("The input bit vectors are not distinct");
            if (prefix == prev.length() || prefix == curr.length())
                throw new IllegalArgumentException("The input bit vectors are not prefix-free");
            if (prev.getBoolean(prefix))
                throw new IllegalArgumentException("The input bit vectors are not lexicographically sorted");

            currLcp = Math.min(prefix, currLcp);
            prev.replace(curr);

            maxLength = Math.max(maxLength, prev.length());
            totalLength += Fast.length(1 + prev.length());
        }

        lcps.add(prev.subVector(0, currLcp));
        maxLcp = Math.max(maxLcp, currLcp);
        minLcp = Math.min(minLcp, currLcp);
    }

    pl.done();

    // Build function assigning each lcp to its bucket.
    lcp2Bucket = new GOV3Function.Builder<BitVector>().keys(lcps).transform(TransformationStrategies.identity())
            .build();
    final int[][] lcpLength = IntBigArrays.newBigArray(lcps.size64());
    long p = 0;
    for (LongArrayBitVector bv : lcps)
        IntBigArrays.set(lcpLength, p++, (int) bv.length());

    if (DEBUG) {
        for (BitVector v : lcps)
            System.err.println(v + " " + v.length());
        for (BitVector v : lcps) {
            final long value = lcp2Bucket.getLong(v);
            if (p++ != value) {
                System.err.println("p: " + (p - 1) + "  value: " + value + " key:" + v);
                throw new AssertionError();
            }
        }
    }

    lcps.close();

    final Iterable<BitVector> bitVectors = TransformationStrategies.wrap(iterable, transform);
    // Build mph on elements.
    mph = new GOVMinimalPerfectHashFunction.Builder<BitVector>().keys(bitVectors)
            .transform(TransformationStrategies.identity()).store(chunkedHashStore).build();
    this.seed = chunkedHashStore.seed();

    // Build function assigning the lcp length and the bucketing data to each element.
    (offsets = LongArrayBitVector.getInstance().asLongBigList(log2BucketSize)).size(n);
    LongBigList lcpLengthsTemp = LongArrayBitVector.getInstance().asLongBigList(Fast.length(maxLcp));
    lcpLengthsTemp.size(n);

    LOGGER.info("Generating data tables...");

    for (ChunkedHashStore.Chunk chunk : chunkedHashStore) {
        for (long[] quadruple : chunk) {
            final long index = mph.getLongByTriple(quadruple);
            offsets.set(index, quadruple[3] & bucketSizeMask);
            lcpLengthsTemp.set(index, IntBigArrays.get(lcpLength, (int) (quadruple[3] >> log2BucketSize)));
        }
    }

    chunkedHashStore.close();

    lcpLengths = new EliasFanoLongBigList(lcpLengthsTemp.iterator(), minLcp, true);

    if (DEBUG) {
        p = 0;
        for (T key : iterable) {
            BitVector bv = transform.toBitVector(key);
            long index = mph.getLong(bv);
            if (p++ != lcp2Bucket.getLong(bv.subVector(0, lcpLengths.getLong(index))) * bucketSize
                    + offsets.getLong(index)) {
                System.err.println("p: " + (p - 1) + "  Key: " + key + " bucket size: " + bucketSize + " lcp "
                        + transform.toBitVector(key).subVector(0, lcpLengths.getLong(index)) + " lcp length: "
                        + lcpLengths.getLong(index) + " bucket "
                        + lcp2Bucket.getLong(transform.toBitVector(key).subVector(0, lcpLengths.getLong(index)))
                        + " offset: " + offsets.getLong(index));
                throw new AssertionError();
            }
        }
    }

    LOGGER.debug("Bucket size: " + bucketSize);
    final double avgLength = (double) totalLength / n;
    LOGGER.debug("Forecast bit cost per element: " + (2 * GOV3Function.C + 2 + avgLength + Fast.log2(avgLength)
            + Fast.log2(Math.E) - Fast.log2(Fast.log2(Math.E)) + Fast.log2(1 + Fast.log2(n))));
    LOGGER.info("Actual bit cost per element: " + (double) numBits() / n);
}