org.apache.mahout.math.SequentialAccessSparseVector.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.mahout.math.SequentialAccessSparseVector.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.mahout.math;

import java.util.Arrays;
import java.util.Iterator;
import java.util.NoSuchElementException;

import com.google.common.primitives.Doubles;
import org.apache.mahout.math.function.Functions;

/**
 * <p>
 * Implements vector that only stores non-zero doubles as a pair of parallel arrays (OrderedIntDoubleMapping),
 * one int[], one double[].  If there are <b>k</b> non-zero elements in the vector, this implementation has
 * O(log(k)) random-access read performance, and O(k) random-access write performance, which is far below that
 * of the hashmap based {@link org.apache.mahout.math.RandomAccessSparseVector RandomAccessSparseVector}.  This
 * class is primarily used for operations where the all the elements will be accessed in a read-only fashion
 * sequentially: methods which operate not via get() or set(), but via iterateNonZero(), such as (but not limited
 * to) :</p>
 * <ul>
 *   <li>dot(Vector)</li>
 *   <li>addTo(Vector)</li>
 * </ul>
 *
 * See {@link OrderedIntDoubleMapping}
 */
public class SequentialAccessSparseVector extends AbstractVector {

    private OrderedIntDoubleMapping values;

    /** For serialization purposes only. */
    public SequentialAccessSparseVector() {
        super(0);
    }

    public SequentialAccessSparseVector(int cardinality) {
        this(cardinality, Math.min(100, cardinality / 1000 < 10 ? 10 : cardinality / 1000)); // arbitrary estimate of
                                                                                             // 'sparseness'
    }

    public SequentialAccessSparseVector(int cardinality, int size) {
        super(cardinality);
        values = new OrderedIntDoubleMapping(size);
    }

    public SequentialAccessSparseVector(Vector other) {
        this(other.size(), other.getNumNondefaultElements());

        if (other.isSequentialAccess()) {
            for (Element e : other.nonZeroes()) {
                set(e.index(), e.get());
            }
        } else {
            // If the incoming Vector to copy is random, then adding items
            // from the Iterator can degrade performance dramatically if
            // the number of elements is large as this Vector tries to stay
            // in order as items are added, so it's better to sort the other
            // Vector's elements by index and then add them to this
            copySortedRandomAccessSparseVector(other);
        }
    }

    // Sorts a RandomAccessSparseVectors Elements before adding them to this
    private int copySortedRandomAccessSparseVector(Vector other) {
        int elementCount = other.getNumNondefaultElements();
        OrderedElement[] sortableElements = new OrderedElement[elementCount];
        int s = 0;
        for (Element e : other.nonZeroes()) {
            sortableElements[s++] = new OrderedElement(e.index(), e.get());
        }
        Arrays.sort(sortableElements);
        for (int i = 0; i < sortableElements.length; i++) {
            values.setIndexAt(i, sortableElements[i].index);
            values.setValueAt(i, sortableElements[i].value);
        }
        values = new OrderedIntDoubleMapping(values.getIndices(), values.getValues(), elementCount);
        return elementCount;
    }

    public SequentialAccessSparseVector(SequentialAccessSparseVector other, boolean shallowCopy) {
        super(other.size());
        values = shallowCopy ? other.values : other.values.clone();
    }

    public SequentialAccessSparseVector(SequentialAccessSparseVector other) {
        this(other.size(), other.getNumNondefaultElements());
        values = other.values.clone();
    }

    private SequentialAccessSparseVector(int cardinality, OrderedIntDoubleMapping values) {
        super(cardinality);
        this.values = values;
    }

    @Override
    protected Matrix matrixLike(int rows, int columns) {
        //return new SparseRowMatrix(rows, columns);
        return new SparseMatrix(rows, columns);
    }

    @SuppressWarnings("CloneDoesntCallSuperClone")
    @Override
    public SequentialAccessSparseVector clone() {
        return new SequentialAccessSparseVector(size(), values.clone());
    }

    @Override
    public void mergeUpdates(OrderedIntDoubleMapping updates) {
        values.merge(updates);
    }

    @Override
    public String toString() {
        return sparseVectorToString();
    }

    /**
     * @return false
     */
    @Override
    public boolean isDense() {
        return false;
    }

    /**
     * @return true
     */
    @Override
    public boolean isSequentialAccess() {
        return true;
    }

    /**
     * Warning! This takes O(log n) time as it does a binary search behind the scenes!
     * Only use it when STRICTLY necessary.
     * @param index an int index.
     * @return the value at that position in the vector.
     */
    @Override
    public double getQuick(int index) {
        return values.get(index);
    }

    /**
     * Warning! This takes O(log n) time as it does a binary search behind the scenes!
     * Only use it when STRICTLY necessary.
     * @param index an int index.
     */
    @Override
    public void setQuick(int index, double value) {
        invalidateCachedLength();
        values.set(index, value);
    }

    @Override
    public void incrementQuick(int index, double increment) {
        invalidateCachedLength();
        values.increment(index, increment);
    }

    @Override
    public SequentialAccessSparseVector like() {
        return new SequentialAccessSparseVector(size(), values.getNumMappings());
    }

    @Override
    public Vector like(int cardinality) {
        return new SequentialAccessSparseVector(cardinality);
    }

    @Override
    public int getNumNondefaultElements() {
        return values.getNumMappings();
    }

    @Override
    public int getNumNonZeroElements() {
        double[] elementValues = values.getValues();
        int numMappedElements = values.getNumMappings();
        int numNonZeros = 0;
        for (int index = 0; index < numMappedElements; index++) {
            if (elementValues[index] != 0) {
                numNonZeros++;
            }
        }
        return numNonZeros;
    }

    @Override
    public double getLookupCost() {
        return Math.max(1, Math.round(Functions.LOG2.apply(getNumNondefaultElements())));
    }

    @Override
    public double getIteratorAdvanceCost() {
        return 1;
    }

    @Override
    public boolean isAddConstantTime() {
        return false;
    }

    @Override
    public Iterator<Element> iterateNonZero() {

        // TODO: this is a bug, since nonDefaultIterator doesn't hold to non-zero contract.
        return new NonDefaultIterator();
    }

    @Override
    public Iterator<Element> iterator() {
        return new AllIterator();
    }

    private final class NonDefaultIterator implements Iterator<Element> {
        private final NonDefaultElement element = new NonDefaultElement();

        @Override
        public boolean hasNext() {
            return element.getNextOffset() < values.getNumMappings();
        }

        @Override
        public Element next() {
            if (!hasNext()) {
                throw new NoSuchElementException();
            }
            element.advanceOffset();
            return element;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }

    private final class AllIterator implements Iterator<Element> {
        private final AllElement element = new AllElement();

        @Override
        public boolean hasNext() {
            return element.getNextIndex() < SequentialAccessSparseVector.this.size();
        }

        @Override
        public Element next() {
            if (!hasNext()) {
                throw new NoSuchElementException();
            }

            element.advanceIndex();
            return element;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }

    private final class NonDefaultElement implements Element {
        private int offset = -1;

        void advanceOffset() {
            offset++;
        }

        int getNextOffset() {
            return offset + 1;
        }

        @Override
        public double get() {
            return values.getValues()[offset];
        }

        @Override
        public int index() {
            return values.getIndices()[offset];
        }

        @Override
        public void set(double value) {
            invalidateCachedLength();
            values.setValueAt(offset, value);
        }
    }

    private final class AllElement implements Element {
        private int index = -1;
        private int nextOffset;

        void advanceIndex() {
            index++;
            if (nextOffset < values.getNumMappings() && index > values.getIndices()[nextOffset]) {
                nextOffset++;
            }
        }

        int getNextIndex() {
            return index + 1;
        }

        @Override
        public double get() {
            if (nextOffset < values.getNumMappings() && index == values.getIndices()[nextOffset]) {
                return values.getValues()[nextOffset];
            } else {
                return OrderedIntDoubleMapping.DEFAULT_VALUE;
            }
        }

        @Override
        public int index() {
            return index;
        }

        @Override
        public void set(double value) {
            invalidateCachedLength();
            if (nextOffset < values.getNumMappings() && index == values.indexAt(nextOffset)) {
                values.setValueAt(nextOffset, value);
            } else {
                // Yes, this works; the offset into indices of the new value's index will still be nextOffset
                values.set(index, value);
            }
        }
    }

    // Comparable Element for sorting Elements by index
    private static final class OrderedElement implements Comparable<OrderedElement> {
        private final int index;
        private final double value;

        OrderedElement(int index, double value) {
            this.index = index;
            this.value = value;
        }

        @Override
        public int compareTo(OrderedElement that) {
            // both indexes are positive, and neither can be Integer.MAX_VALUE (otherwise there would be
            // an array somewhere with Integer.MAX_VALUE + 1 elements)
            return this.index - that.index;
        }

        @Override
        public int hashCode() {
            return index ^ Doubles.hashCode(value);
        }

        @Override
        public boolean equals(Object o) {
            if (!(o instanceof OrderedElement)) {
                return false;
            }
            OrderedElement other = (OrderedElement) o;
            return index == other.index && value == other.value;
        }
    }
}