Example usage for java.lang Long SIZE

List of usage examples for java.lang Long SIZE

Introduction

In this page you can find the example usage for java.lang Long SIZE.

Prototype

int SIZE

To view the source code for java.lang Long SIZE.

Click Source Link

Document

The number of bits used to represent a long value in two's complement binary form.

Usage

From source file:it.unimi.dsi.sux4j.mph.CHDMinimalPerfectHashFunction.java

/** A dirty function replicating the behaviour of {@link #getLongByTriple(long[])} but skipping the
 * signature test. Used in the constructor. <strong>Must</strong> be kept in sync with {@link #getLongByTriple(long[])}. */
private long getLongByTripleNoCheck(final long[] triple) {
    final int chunk = chunkShift == Long.SIZE ? 0 : (int) (triple[0] >>> chunkShift);
    final int index = chunk * 3;
    final long[] offsetNumBucketsSeed = this.offsetNumBucketsSeed;
    final long chunkOffset = offsetNumBucketsSeed[index];
    final int p = (int) (offsetNumBucketsSeed[index + 3] - chunkOffset);

    final long[] h = new long[3];
    Hashes.spooky4(triple, offsetNumBucketsSeed[index + 2], h);
    h[1] = (int) ((h[1] >>> 1) % p);
    h[2] = (int) ((h[2] >>> 1) % (p - 1)) + 1;

    final long numBuckets = offsetNumBucketsSeed[index + 1];
    final long c = coefficients
            .getLong(numBuckets + (h[0] >>> 1) % (offsetNumBucketsSeed[index + 4] - numBuckets));

    final long result = chunkOffset + (int) ((h[1] + (c % p) * h[2] + c / p) % p);
    return result - rank.rank(result);
}

From source file:it.unimi.dsi.sux4j.mph.MWHCFunction.java

/** Creates a new function for the given keys and values.
 * /* w  ww .ja va  2  s . com*/
 * @param keys the keys in the domain of the function, or {@code null}.
 * @param transform a transformation strategy for the keys.
 * @param signatureWidth a positive number for a signature width, 0 for no signature, a negative value for a self-signed function; if nonzero, {@code values} must be {@code null} and {@code width} must be -1.
 * @param values values to be assigned to each element, in the same order of the iterator returned by <code>keys</code>; if {@code null}, the
 * assigned value will the the ordinal number of each element.
 * @param dataWidth the bit width of the <code>values</code>, or -1 if <code>values</code> is {@code null}.
 * @param tempDir a temporary directory for the store files, or {@code null} for the standard temporary directory.
 * @param chunkedHashStore a chunked hash store containing the keys associated with their ranks (if there are no values, or {@code indirect} is true)
 * or values, or {@code null}; the store
 * can be unchecked, but in this case <code>keys</code> and <code>transform</code> must be non-{@code null}. 
 * @param indirect if true, <code>chunkedHashStore</code> contains ordinal positions, and <code>values</code> is a {@link LongIterable} that
 * must be accessed to retrieve the actual values. 
 */
protected MWHCFunction(final Iterable<? extends T> keys, final TransformationStrategy<? super T> transform,
        int signatureWidth, final LongIterable values, final int dataWidth, final File tempDir,
        ChunkedHashStore<T> chunkedHashStore, boolean indirect) throws IOException {
    this.transform = transform;

    if (signatureWidth != 0 && values != null)
        throw new IllegalArgumentException("You cannot sign a function if you specify its values");
    if (signatureWidth != 0 && dataWidth != -1)
        throw new IllegalArgumentException("You cannot specify a signature width and a data width");

    // If we have no keys, values must be a random-access list of longs.
    final LongBigList valueList = indirect
            ? (values instanceof LongList ? LongBigLists.asBigList((LongList) values) : (LongBigList) values)
            : null;

    final ProgressLogger pl = new ProgressLogger(LOGGER);
    pl.displayLocalSpeed = true;
    pl.displayFreeMemory = true;
    final RandomGenerator r = new XorShift1024StarRandomGenerator();
    pl.itemsName = "keys";

    final boolean givenChunkedHashStore = chunkedHashStore != null;
    if (!givenChunkedHashStore) {
        if (keys == null)
            throw new IllegalArgumentException(
                    "If you do not provide a chunked hash store, you must provide the keys");
        chunkedHashStore = new ChunkedHashStore<T>(transform, tempDir, -Math.min(signatureWidth, 0), pl);
        chunkedHashStore.reset(r.nextLong());
        if (values == null || indirect)
            chunkedHashStore.addAll(keys.iterator());
        else
            chunkedHashStore.addAll(keys.iterator(), values != null ? values.iterator() : null);
    }
    n = chunkedHashStore.size();
    defRetValue = signatureWidth < 0 ? 0 : -1; // Self-signed maps get zero as default resturn value.

    if (n == 0) {
        m = this.globalSeed = chunkShift = this.width = 0;
        data = null;
        marker = null;
        rank = null;
        seed = null;
        offset = null;
        signatureMask = 0;
        signatures = null;
        return;
    }

    int log2NumChunks = Math.max(0, Fast.mostSignificantBit(n >> LOG2_CHUNK_SIZE));
    chunkShift = chunkedHashStore.log2Chunks(log2NumChunks);
    final int numChunks = 1 << log2NumChunks;

    LOGGER.debug("Number of chunks: " + numChunks);

    seed = new long[numChunks];
    offset = new long[numChunks + 1];

    this.width = signatureWidth < 0 ? -signatureWidth : dataWidth == -1 ? Fast.ceilLog2(n) : dataWidth;

    // Candidate data; might be discarded for compaction.
    @SuppressWarnings("resource")
    final OfflineIterable<BitVector, LongArrayBitVector> offlineData = new OfflineIterable<BitVector, LongArrayBitVector>(
            BitVectors.OFFLINE_SERIALIZER, LongArrayBitVector.getInstance());

    int duplicates = 0;

    for (;;) {
        LOGGER.debug("Generating MWHC function with " + this.width + " output bits...");

        long seed = 0;
        pl.expectedUpdates = numChunks;
        pl.itemsName = "chunks";
        pl.start("Analysing chunks... ");

        try {
            int q = 0;
            final LongArrayBitVector dataBitVector = LongArrayBitVector.getInstance();
            final LongBigList data = dataBitVector.asLongBigList(this.width);
            for (ChunkedHashStore.Chunk chunk : chunkedHashStore) {
                HypergraphSorter<BitVector> sorter = new HypergraphSorter<BitVector>(chunk.size());
                do {
                    seed = r.nextLong();
                } while (!sorter.generateAndSort(chunk.iterator(), seed));

                this.seed[q] = seed;
                dataBitVector.fill(false);
                data.size(sorter.numVertices);
                offset[q + 1] = offset[q] + sorter.numVertices;

                /* We assign values. */

                int top = chunk.size(), x, k;
                final int[] stack = sorter.stack;
                final int[] vertex1 = sorter.vertex1;
                final int[] vertex2 = sorter.vertex2;
                final int[] edge = sorter.edge;

                while (top > 0) {
                    x = stack[--top];
                    k = edge[x];
                    final long s = data.getLong(vertex1[x]) ^ data.getLong(vertex2[x]);
                    final long value = indirect ? valueList.getLong(chunk.data(k)) : chunk.data(k);
                    data.set(x, value ^ s);

                    if (ASSERTS)
                        assert (value == (data.getLong(x) ^ data.getLong(vertex1[x])
                                ^ data.getLong(vertex2[x]))) : "<" + x + "," + vertex1[x] + "," + vertex2[x]
                                        + ">: " + value + " != " + (data.getLong(x) ^ data.getLong(vertex1[x])
                                                ^ data.getLong(vertex2[x]));
                }

                q++;
                offlineData.add(dataBitVector);
                pl.update();
            }

            pl.done();
            break;
        } catch (ChunkedHashStore.DuplicateException e) {
            if (keys == null)
                throw new IllegalStateException(
                        "You provided no keys, but the chunked hash store was not checked");
            if (duplicates++ > 3)
                throw new IllegalArgumentException("The input list contains duplicates");
            LOGGER.warn("Found duplicate. Recomputing triples...");
            chunkedHashStore.reset(r.nextLong());
            pl.itemsName = "keys";
            if (values == null || indirect)
                chunkedHashStore.addAll(keys.iterator());
            else
                chunkedHashStore.addAll(keys.iterator(), values != null ? values.iterator() : null);
        }
    }

    if (DEBUG)
        System.out.println("Offsets: " + Arrays.toString(offset));

    globalSeed = chunkedHashStore.seed();

    // Check for compaction
    long nonZero = 0;
    m = offset[offset.length - 1];

    {
        final OfflineIterator<BitVector, LongArrayBitVector> iterator = offlineData.iterator();
        while (iterator.hasNext()) {
            final LongBigList data = iterator.next().asLongBigList(this.width);
            for (long i = 0; i < data.size64(); i++)
                if (data.getLong(i) != 0)
                    nonZero++;
        }
        iterator.close();
    }
    // We estimate size using Rank16
    if (nonZero * this.width + m * 1.126 < m * this.width) {
        LOGGER.info("Compacting...");
        marker = LongArrayBitVector.ofLength(m);
        final LongBigList newData = LongArrayBitVector.getInstance().asLongBigList(this.width);
        newData.size(nonZero);
        nonZero = 0;

        final OfflineIterator<BitVector, LongArrayBitVector> iterator = offlineData.iterator();
        long j = 0;
        while (iterator.hasNext()) {
            final LongBigList data = iterator.next().asLongBigList(this.width);
            for (long i = 0; i < data.size64(); i++, j++) {
                final long value = data.getLong(i);
                if (value != 0) {
                    marker.set(j);
                    newData.set(nonZero++, value);
                }
            }
        }
        iterator.close();

        rank = new Rank16(marker);

        if (ASSERTS) {
            final OfflineIterator<BitVector, LongArrayBitVector> iterator2 = offlineData.iterator();
            long k = 0;
            while (iterator2.hasNext()) {
                final LongBigList data = iterator2.next().asLongBigList(this.width);
                for (long i = 0; i < data.size64(); i++, k++) {
                    final long value = data.getLong(i);
                    assert (value != 0) == marker.getBoolean(k);
                    if (value != 0)
                        assert value == newData.getLong(rank.rank(k)) : value + " != "
                                + newData.getLong(rank.rank(k));
                }
            }
            iterator2.close();
        }
        this.data = newData;
    } else {
        final LongArrayBitVector dataBitVector = LongArrayBitVector.getInstance(m * this.width);
        this.data = dataBitVector.asLongBigList(this.width);

        OfflineIterator<BitVector, LongArrayBitVector> iterator = offlineData.iterator();
        while (iterator.hasNext())
            dataBitVector.append(iterator.next());
        iterator.close();

        marker = null;
        rank = null;
    }

    offlineData.close();

    LOGGER.info("Completed.");
    LOGGER.debug("Forecast bit cost per element: " + (marker == null ? HypergraphSorter.GAMMA * this.width
            : HypergraphSorter.GAMMA + this.width + 0.126));
    LOGGER.info("Actual bit cost per element: " + (double) numBits() / n);

    if (signatureWidth > 0) {
        signatureMask = -1L >>> Long.SIZE - signatureWidth;
        signatures = chunkedHashStore.signatures(signatureWidth, pl);
    } else if (signatureWidth < 0) {
        signatureMask = -1L >>> Long.SIZE + signatureWidth;
        signatures = null;
    } else {
        signatureMask = 0;
        signatures = null;
    }

    if (!givenChunkedHashStore)
        chunkedHashStore.close();
}

From source file:it.unimi.dsi.webgraph.algo.HyperBall.java

/** Creates a new HyperBall instance.
 * /*w ww  .  j a v  a2 s  .  c o m*/
 * @param g the graph whose neighbourhood function you want to compute.
 * @param gt the transpose of <code>g</code>, or <code>null</code>.
 * @param log2m the logarithm of the number of registers per counter.
 * @param pl a progress logger, or <code>null</code>.
 * @param numberOfThreads the number of threads to be used (0 for automatic sizing).
 * @param bufferSize the size of an I/O buffer in bytes (0 for {@link #DEFAULT_BUFFER_SIZE}).
 * @param granularity the number of node per task in a multicore environment (it will be rounded to the next multiple of 64), or 0 for {@link #DEFAULT_GRANULARITY}.
 * @param external if true, results of an iteration will be stored on disk.
 * @param doSumOfDistances whether the sum of distances from each node should be computed.
 * @param doSumOfInverseDistances whether the sum of inverse distances from each node should be computed.
 * @param discountFunction an array (possibly <code>null</code>) of discount functions. 
 * @param seed the random seed passed to {@link HyperLogLogCounterArray#HyperLogLogCounterArray(long, long, int, long)}.
 */
public HyperBall(final ImmutableGraph g, final ImmutableGraph gt, final int log2m, final ProgressLogger pl,
        final int numberOfThreads, final int bufferSize, final int granularity, final boolean external,
        final boolean doSumOfDistances, final boolean doSumOfInverseDistances,
        final Int2DoubleFunction[] discountFunction, final long seed) throws IOException {
    super(g.numNodes(), g.numNodes(), ensureRegisters(log2m), seed);

    info("Seed : " + Long.toHexString(seed));

    gotTranspose = gt != null;
    localNextMustBeChecked = gotTranspose
            ? IntSets.synchronize(new IntOpenHashSet(Hash.DEFAULT_INITIAL_SIZE, Hash.VERY_FAST_LOAD_FACTOR))
            : null;

    numNodes = g.numNodes();
    try {
        numArcs = g.numArcs();
    } catch (UnsupportedOperationException e) {
        // No number of arcs. We have to enumerate.
        long a = 0;
        final NodeIterator nodeIterator = g.nodeIterator();
        for (int i = g.numNodes(); i-- != 0;) {
            nodeIterator.nextInt();
            a += nodeIterator.outdegree();
        }
        numArcs = a;
    }
    squareNumNodes = (double) numNodes * numNodes;

    cumulativeOutdegrees = new EliasFanoCumulativeOutdegreeList(g, numArcs, Math.max(0, 64 / m - 1));

    modifiedCounter = new boolean[numNodes];
    modifiedResultCounter = external ? null : new boolean[numNodes];
    if (gt != null) {
        mustBeChecked = new boolean[numNodes];
        nextMustBeChecked = new boolean[numNodes];
        if (gt.numNodes() != g.numNodes())
            throw new IllegalArgumentException("The graph and its transpose have a different number of nodes");
        if (gt.numArcs() != g.numArcs())
            throw new IllegalArgumentException("The graph and its transpose have a different number of arcs");
    }

    this.pl = pl;
    this.external = external;
    this.doSumOfDistances = doSumOfDistances;
    this.doSumOfInverseDistances = doSumOfInverseDistances;
    this.discountFunction = discountFunction == null ? new Int2DoubleFunction[0] : discountFunction;
    this.numberOfThreads = numberOfThreads(numberOfThreads);
    this.granularity = numberOfThreads == 1 ? numNodes
            : granularity == 0 ? DEFAULT_GRANULARITY : ((granularity + Long.SIZE - 1) & ~(Long.SIZE - 1));
    this.bufferSize = Math.max(1, (bufferSize == 0 ? DEFAULT_BUFFER_SIZE : bufferSize)
            / ((Long.SIZE / Byte.SIZE) * (counterLongwords + 1)));

    info("Relative standard deviation: "
            + Util.format(100 * HyperLogLogCounterArray.relativeStandardDeviation(log2m)) + "% (" + m
            + " registers/counter, " + registerSize + " bits/register, " + Util.format(m * registerSize / 8.)
            + " bytes/counter)");
    if (external)
        info("Running " + this.numberOfThreads + " threads with a buffer of " + Util.formatSize(this.bufferSize)
                + " counters");
    else
        info("Running " + this.numberOfThreads + " threads");

    thread = new IterationThread[this.numberOfThreads];

    if (external) {
        info("Creating update list...");
        updateFile = File.createTempFile(HyperBall.class.getName(), "-temp");
        updateFile.deleteOnExit();
        fileChannel = (randomAccessFile = new RandomAccessFile(updateFile, "rw")).getChannel();
    } else {
        updateFile = null;
        fileChannel = null;
    }

    nodes = new AtomicInteger();
    arcs = new AtomicLong();
    modified = new AtomicInteger();
    unwritten = new AtomicInteger();

    neighbourhoodFunction = new DoubleArrayList();
    sumOfDistances = doSumOfDistances ? new float[numNodes] : null;
    sumOfInverseDistances = doSumOfInverseDistances ? new float[numNodes] : null;
    discountedCentrality = new float[this.discountFunction.length][];
    for (int i = 0; i < this.discountFunction.length; i++)
        discountedCentrality[i] = new float[numNodes];

    info("HyperBall memory usage: " + Util.formatSize2(usedMemory()) + " [not counting graph(s)]");

    if (!external) {
        info("Allocating result bit vectors...");
        // Allocate vectors that will store the result.
        resultBits = new long[bits.length][];
        resultRegisters = new LongBigList[bits.length];
        for (int i = bits.length; i-- != 0;)
            resultRegisters[i] = (LongArrayBitVector.wrap(resultBits[i] = new long[bits[i].length]))
                    .asLongBigList(registerSize);
    } else {
        resultBits = null;
        resultRegisters = null;
    }

    lock = new ReentrantLock();
    allWaiting = lock.newCondition();
    start = lock.newCondition();
    aliveThreads = this.numberOfThreads;

    if (this.numberOfThreads == 1)
        (thread[0] = new IterationThread(g, gt, 0)).start();
    else
        for (int i = 0; i < this.numberOfThreads; i++)
            (thread[i] = new IterationThread(g.copy(), gt != null ? gt.copy() : null, i)).start();

    // We wait for all threads being read to start.
    lock.lock();
    try {
        if (aliveThreads != 0)
            allWaiting.await();
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    } finally {
        lock.unlock();
    }
}

From source file:it.unimi.di.big.mg4j.tool.PartitionLexically.java

private static void copy(final LongWordInputBitStream from, final LongWordOutputBitStream to, long length)
        throws IOException {
    while (length > 0) {
        final int width = (int) Math.min(Long.SIZE, length);
        to.append(from.extract(width), width);
        length -= width;/*from  ww  w. ja  va 2 s  .  co m*/
    }
}

From source file:it.unimi.dsi.sux4j.mph.MWHCFunction.java

@SuppressWarnings("unchecked")
public long getLong(final Object o) {
    if (n == 0)//from   www . j  av  a 2  s . c  om
        return defRetValue;
    final int[] e = new int[3];
    final long[] h = new long[3];
    Hashes.spooky4(transform.toBitVector((T) o), globalSeed, h);
    final int chunk = chunkShift == Long.SIZE ? 0 : (int) (h[0] >>> chunkShift);
    final long chunkOffset = offset[chunk];
    HypergraphSorter.tripleToEdge(h, seed[chunk], (int) (offset[chunk + 1] - chunkOffset), e);
    if (e[0] == -1)
        return defRetValue;
    final long e0 = e[0] + chunkOffset, e1 = e[1] + chunkOffset, e2 = e[2] + chunkOffset;

    final long result = rank == null ? data.getLong(e0) ^ data.getLong(e1) ^ data.getLong(e2)
            : (marker.getBoolean(e0) ? data.getLong(rank.rank(e0)) : 0)
                    ^ (marker.getBoolean(e1) ? data.getLong(rank.rank(e1)) : 0)
                    ^ (marker.getBoolean(e2) ? data.getLong(rank.rank(e2)) : 0);
    if (signatureMask == 0)
        return result;
    // Out-of-set strings can generate bizarre 3-hyperedges.
    if (signatures != null)
        return result >= n || ((signatures.getLong(result) ^ h[0]) & signatureMask) != 0 ? defRetValue : result;
    else
        return ((result ^ h[0]) & signatureMask) != 0 ? defRetValue : 1;
}

From source file:it.unimi.dsi.sux4j.mph.GOV4Function.java

/** Low-level access to the output of this function.
 *
 * <p>This method makes it possible to build several kind of functions on the same {@link ChunkedHashStore} and
 * then retrieve the resulting values by generating a single triple of hashes. The method 
 * {@link TwoStepsGOV3Function#getLong(Object)} is a good example of this technique.
 *
 * @param triple a triple generated as documented in {@link ChunkedHashStore}.
 * @return the output of the function.//from   www.  ja  v a  2 s  . c  o  m
 */
public long getLongByTriple(final long[] triple) {
    if (n == 0)
        return defRetValue;
    final int[] e = new int[4];
    final int chunk = chunkShift == Long.SIZE ? 0 : (int) (triple[0] >>> chunkShift);
    final long chunkOffset = offsetAndSeed[chunk] & OFFSET_MASK;
    Linear4SystemSolver.tripleToEquation(triple, offsetAndSeed[chunk] & ~OFFSET_MASK,
            (int) ((offsetAndSeed[chunk + 1] & OFFSET_MASK) - chunkOffset), e);
    final long e0 = e[0] + chunkOffset, e1 = e[1] + chunkOffset, e2 = e[2] + chunkOffset,
            e3 = e[3] + chunkOffset;

    final long result = data.getLong(e0) ^ data.getLong(e1) ^ data.getLong(e2) ^ data.getLong(e3);
    if (signatureMask == 0)
        return result;
    if (signatures != null)
        return result >= n || ((signatures.getLong(result) ^ triple[0]) & signatureMask) != 0 ? defRetValue
                : result;
    else
        return ((result ^ triple[0]) & signatureMask) != 0 ? defRetValue : 1;
}

From source file:it.unimi.dsi.sux4j.mph.MWHCFunction.java

/** Low-level access to the output of this function.
 *
 * <p>This method makes it possible to build several kind of functions on the same {@link ChunkedHashStore} and
 * then retrieve the resulting values by generating a single triple of hashes. The method 
 * {@link TwoStepsMWHCFunction#getLong(Object)} is a good example of this technique.
 *
 * @param triple a triple generated as documented in {@link ChunkedHashStore}.
 * @return the output of the function./*w ww  . ja  va 2s . com*/
 */
public long getLongByTriple(final long[] triple) {
    if (n == 0)
        return defRetValue;
    final int[] e = new int[3];
    final int chunk = chunkShift == Long.SIZE ? 0 : (int) (triple[0] >>> chunkShift);
    final long chunkOffset = offset[chunk];
    HypergraphSorter.tripleToEdge(triple, seed[chunk], (int) (offset[chunk + 1] - chunkOffset), e);
    final long e0 = e[0] + chunkOffset, e1 = e[1] + chunkOffset, e2 = e[2] + chunkOffset;
    if (e0 == -1)
        return defRetValue;
    final long result = rank == null ? data.getLong(e0) ^ data.getLong(e1) ^ data.getLong(e2)
            : (marker.getBoolean(e0) ? data.getLong(rank.rank(e0)) : 0)
                    ^ (marker.getBoolean(e1) ? data.getLong(rank.rank(e1)) : 0)
                    ^ (marker.getBoolean(e2) ? data.getLong(rank.rank(e2)) : 0);
    if (signatureMask == 0)
        return result;
    // Out-of-set strings can generate bizarre 3-hyperedges.
    if (signatures != null)
        return result >= n || signatures.getLong(result) != (triple[0] & signatureMask) ? defRetValue : result;
    else
        return ((result ^ triple[0]) & signatureMask) != 0 ? defRetValue : 1;
}

From source file:it.unimi.dsi.sux4j.mph.GOV4Function.java

/** Returns the number of bits used by this structure.
 * /*from w  w  w. j av  a 2s  .  c  o m*/
 * @return the number of bits used by this structure.
 */
public long numBits() {
    if (n == 0)
        return 0;
    return (data != null ? data.size64() : 0) * width + offsetAndSeed.length * (long) Long.SIZE;
}

From source file:it.unimi.dsi.sux4j.mph.GOV3Function.java

/** Creates a new function for the given keys and values.
 * /* w  ww  . java 2  s  .  co m*/
 * @param keys the keys in the domain of the function, or {@code null}.
 * @param transform a transformation strategy for the keys.
 * @param signatureWidth a positive number for a signature width, 0 for no signature, a negative value for a self-signed function; if nonzero, {@code values} must be {@code null} and {@code width} must be -1.
 * @param values values to be assigned to each element, in the same order of the iterator returned by <code>keys</code>; if {@code null}, the
 * assigned value will the the ordinal number of each element.
 * @param dataWidth the bit width of the <code>values</code>, or -1 if <code>values</code> is {@code null}.
 * @param indirect if true, <code>chunkedHashStore</code> contains ordinal positions, and <code>values</code> is a {@link LongIterable} that
 * must be accessed to retrieve the actual values.
 * @param compacted if true, the coefficients will be compacted. 
 * @param tempDir a temporary directory for the store files, or {@code null} for the standard temporary directory.
 * @param chunkedHashStore a chunked hash store containing the keys associated with their ranks (if there are no values, or {@code indirect} is true)
 * or values, or {@code null}; the store
 * can be unchecked, but in this case <code>keys</code> and <code>transform</code> must be non-{@code null}. 
 */
protected GOV3Function(final Iterable<? extends T> keys, final TransformationStrategy<? super T> transform,
        int signatureWidth, final LongIterable values, final int dataWidth, final boolean indirect,
        final boolean compacted, final File tempDir, ChunkedHashStore<T> chunkedHashStore) throws IOException {
    this.transform = transform;

    if (signatureWidth != 0 && values != null)
        throw new IllegalArgumentException("You cannot sign a function if you specify its values");
    if (signatureWidth != 0 && dataWidth != -1)
        throw new IllegalArgumentException("You cannot specify a signature width and a data width");

    final ProgressLogger pl = new ProgressLogger(LOGGER);
    pl.displayLocalSpeed = true;
    pl.displayFreeMemory = true;
    final RandomGenerator r = new XorShift1024StarRandomGenerator();
    pl.itemsName = "keys";

    final boolean givenChunkedHashStore = chunkedHashStore != null;
    if (!givenChunkedHashStore) {
        if (keys == null)
            throw new IllegalArgumentException(
                    "If you do not provide a chunked hash store, you must provide the keys");
        chunkedHashStore = new ChunkedHashStore<T>(transform, tempDir, -Math.min(signatureWidth, 0), pl);
        chunkedHashStore.reset(r.nextLong());
        if (values == null || indirect)
            chunkedHashStore.addAll(keys.iterator());
        else
            chunkedHashStore.addAll(keys.iterator(), values != null ? values.iterator() : null);
    }
    n = chunkedHashStore.size();
    defRetValue = signatureWidth < 0 ? 0 : -1; // Self-signed maps get zero as default resturn value.

    if (n == 0) {
        m = this.globalSeed = chunkShift = this.width = 0;
        data = null;
        marker = null;
        rank = null;
        offsetAndSeed = null;
        signatureMask = 0;
        signatures = null;
        if (!givenChunkedHashStore)
            chunkedHashStore.close();
        return;
    }

    int log2NumChunks = Math.max(0, Fast.mostSignificantBit(n >> LOG2_CHUNK_SIZE));
    chunkShift = chunkedHashStore.log2Chunks(log2NumChunks);
    final int numChunks = 1 << log2NumChunks;

    LOGGER.debug("Number of chunks: " + numChunks);

    offsetAndSeed = new long[numChunks + 1];

    this.width = signatureWidth < 0 ? -signatureWidth : dataWidth == -1 ? Fast.ceilLog2(n) : dataWidth;

    // Candidate data; might be discarded for compaction.
    @SuppressWarnings("resource")
    final OfflineIterable<BitVector, LongArrayBitVector> offlineData = new OfflineIterable<BitVector, LongArrayBitVector>(
            BitVectors.OFFLINE_SERIALIZER, LongArrayBitVector.getInstance());

    int duplicates = 0;

    for (;;) {
        LOGGER.debug("Generating GOV function with " + this.width + " output bits...");

        pl.expectedUpdates = numChunks;
        pl.itemsName = "chunks";
        pl.start("Analysing chunks... ");

        try {
            int q = 0;
            final LongArrayBitVector dataBitVector = LongArrayBitVector.getInstance();
            final LongBigList data = dataBitVector.asLongBigList(this.width);
            long unsolvable = 0;
            for (final ChunkedHashStore.Chunk chunk : chunkedHashStore) {

                offsetAndSeed[q + 1] = offsetAndSeed[q] + (C_TIMES_256 * chunk.size() >>> 8);

                long seed = 0;
                final int v = (int) (offsetAndSeed[q + 1] - offsetAndSeed[q]);
                final Linear3SystemSolver<BitVector> solver = new Linear3SystemSolver<BitVector>(v,
                        chunk.size());

                for (;;) {
                    final boolean solved = solver.generateAndSolve(chunk, seed, new AbstractLongBigList() {
                        private final LongBigList valueList = indirect
                                ? (values instanceof LongList ? LongBigLists.asBigList((LongList) values)
                                        : (LongBigList) values)
                                : null;

                        @Override
                        public long size64() {
                            return chunk.size();
                        }

                        @Override
                        public long getLong(final long index) {
                            return indirect ? valueList.getLong(chunk.data(index)) : chunk.data(index);
                        }
                    });
                    unsolvable += solver.unsolvable;
                    if (solved)
                        break;
                    seed += SEED_STEP;
                    if (seed == 0)
                        throw new AssertionError("Exhausted local seeds");
                }

                this.offsetAndSeed[q] |= seed;

                dataBitVector.fill(false);
                data.size(v);
                q++;

                /* We assign values. */
                final long[] solution = solver.solution;
                for (int i = 0; i < solution.length; i++)
                    data.set(i, solution[i]);

                offlineData.add(dataBitVector);
                pl.update();
            }

            LOGGER.info("Unsolvable systems: " + unsolvable + "/" + numChunks + " ("
                    + Util.format(100.0 * unsolvable / numChunks) + "%)");

            pl.done();
            break;
        } catch (ChunkedHashStore.DuplicateException e) {
            if (keys == null)
                throw new IllegalStateException(
                        "You provided no keys, but the chunked hash store was not checked");
            if (duplicates++ > 3)
                throw new IllegalArgumentException("The input list contains duplicates");
            LOGGER.warn("Found duplicate. Recomputing triples...");
            chunkedHashStore.reset(r.nextLong());
            pl.itemsName = "keys";
            if (values == null || indirect)
                chunkedHashStore.addAll(keys.iterator());
            else
                chunkedHashStore.addAll(keys.iterator(), values != null ? values.iterator() : null);
        }
    }

    if (DEBUG)
        System.out.println("Offsets: " + Arrays.toString(offsetAndSeed));

    globalSeed = chunkedHashStore.seed();

    // Check for compaction
    long nonZero = 0;
    m = offsetAndSeed[offsetAndSeed.length - 1];

    {
        final OfflineIterator<BitVector, LongArrayBitVector> iterator = offlineData.iterator();
        while (iterator.hasNext()) {
            final LongBigList data = iterator.next().asLongBigList(this.width);
            for (long i = 0; i < data.size64(); i++)
                if (data.getLong(i) != 0)
                    nonZero++;
        }
        iterator.close();
    }

    if (compacted) {
        LOGGER.info("Compacting...");
        marker = LongArrayBitVector.ofLength(m);
        final LongBigList newData = LongArrayBitVector.getInstance().asLongBigList(this.width);
        newData.size(nonZero);
        nonZero = 0;

        final OfflineIterator<BitVector, LongArrayBitVector> iterator = offlineData.iterator();
        long j = 0;
        while (iterator.hasNext()) {
            final LongBigList data = iterator.next().asLongBigList(this.width);
            for (long i = 0; i < data.size64(); i++, j++) {
                final long value = data.getLong(i);
                if (value != 0) {
                    marker.set(j);
                    newData.set(nonZero++, value);
                }
            }
        }
        iterator.close();

        rank = new Rank16(marker);

        if (ASSERTS) {
            final OfflineIterator<BitVector, LongArrayBitVector> iterator2 = offlineData.iterator();
            long k = 0;
            while (iterator2.hasNext()) {
                final LongBigList data = iterator2.next().asLongBigList(this.width);
                for (long i = 0; i < data.size64(); i++, k++) {
                    final long value = data.getLong(i);
                    assert (value != 0) == marker.getBoolean(k);
                    if (value != 0)
                        assert value == newData.getLong(rank.rank(k)) : value + " != "
                                + newData.getLong(rank.rank(k));
                }
            }
            iterator2.close();
        }
        this.data = newData;
    } else {
        final LongArrayBitVector dataBitVector = LongArrayBitVector.getInstance(m * this.width);
        this.data = dataBitVector.asLongBigList(this.width);

        OfflineIterator<BitVector, LongArrayBitVector> iterator = offlineData.iterator();
        while (iterator.hasNext())
            dataBitVector.append(iterator.next());
        iterator.close();

        marker = null;
        rank = null;
    }

    offlineData.close();

    LOGGER.info("Completed.");
    LOGGER.debug(
            "Forecast bit cost per element: " + (marker == null ? C * this.width : C + this.width + 0.126));
    LOGGER.info("Actual bit cost per element: " + (double) numBits() / n);

    if (signatureWidth > 0) {
        signatureMask = -1L >>> Long.SIZE - signatureWidth;
        signatures = chunkedHashStore.signatures(signatureWidth, pl);
    } else if (signatureWidth < 0) {
        signatureMask = -1L >>> Long.SIZE + signatureWidth;
        signatures = null;
    } else {
        signatureMask = 0;
        signatures = null;
    }

    if (!givenChunkedHashStore)
        chunkedHashStore.close();
}

From source file:it.unimi.dsi.sux4j.mph.GOV3Function.java

@SuppressWarnings("unchecked")
public long getLong(final Object o) {
    if (n == 0)/*from   w w w.j a v  a2 s .  co m*/
        return defRetValue;
    final int[] e = new int[3];
    final long[] h = new long[3];
    Hashes.spooky4(transform.toBitVector((T) o), globalSeed, h);
    final int chunk = chunkShift == Long.SIZE ? 0 : (int) (h[0] >>> chunkShift);
    final long chunkOffset = offsetAndSeed[chunk] & OFFSET_MASK;
    Linear3SystemSolver.tripleToEquation(h, offsetAndSeed[chunk] & ~OFFSET_MASK,
            (int) ((offsetAndSeed[chunk + 1] & OFFSET_MASK) - chunkOffset), e);
    if (e[0] == -1)
        return defRetValue;
    final long e0 = e[0] + chunkOffset, e1 = e[1] + chunkOffset, e2 = e[2] + chunkOffset;

    final long result = rank == null ? data.getLong(e0) ^ data.getLong(e1) ^ data.getLong(e2)
            : (marker.getBoolean(e0) ? data.getLong(rank.rank(e0)) : 0)
                    ^ (marker.getBoolean(e1) ? data.getLong(rank.rank(e1)) : 0)
                    ^ (marker.getBoolean(e2) ? data.getLong(rank.rank(e2)) : 0);
    if (signatureMask == 0)
        return result;
    if (signatures != null)
        return result >= n || ((signatures.getLong(result) ^ h[0]) & signatureMask) != 0 ? defRetValue : result;
    else
        return ((result ^ h[0]) & signatureMask) != 0 ? defRetValue : 1;
}