Example usage for org.apache.mahout.math Vector size

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector size.

Prototype

int size();

Source Link

Document

Return the cardinality of the recipient (the maximum number of values)

Usage

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

/**
 * sum_x sum_a (c_ai * log(p(x|i) * p(a|x)))
 *//*from  ww w .  java  2  s . c  o  m*/
public double perplexity(Vector document, Vector docTopics) {
    double perplexity = 0;
    double norm = docTopics.norm(1) + (docTopics.size() * alpha);
    Iterator<Vector.Element> it = document.iterateNonZero();
    while (it.hasNext()) {
        Vector.Element e = it.next();
        int term = e.index();
        double prob = 0;
        for (int x = 0; x < numTopics; x++) {
            double d = (docTopics.get(x) + alpha) / norm;
            double p = d * (topicTermCounts.viewRow(x).get(term) + eta) / (topicSums.get(x) + eta * numTerms);
            prob += p;
        }
        perplexity += e.get() * Math.log(prob);
    }
    return -perplexity;
}

From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java

License:Open Source License

private static BasicDBList listFromMahoutVector(Vector vec, String prefix, BasicDBObject element) {
    if (vec instanceof NamedVector) {
        element.put(prefix + "Name", ((NamedVector) vec).getName());
    }//  w w  w.  java2s  .  co m
    BasicDBList dbl2 = new BasicDBList();
    if (vec.isDense()) {
        int nSize = vec.size();
        dbl2.ensureCapacity(nSize);
        for (int i = 0; i < nSize; ++i) {
            dbl2.add(vec.getQuick(i));
        }
    } else { // sparse, write as a set in the format [{int:double}]
        Iterator<org.apache.mahout.math.Vector.Element> elIt = vec.iterateNonZero();
        while (elIt.hasNext()) {
            BasicDBObject el2 = new BasicDBObject();
            org.apache.mahout.math.Vector.Element el = elIt.next();
            el2.put("k", el.index());
            el2.put("v", el.get());
            dbl2.add(el2);
        }
    }
    return dbl2;
}

From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java

License:Apache License

public static NaiveBayesModel materialize(Path output, Configuration conf) throws IOException {
    FileSystem fs = output.getFileSystem(conf);

    Vector weightsPerLabel = null;
    Vector perLabelThetaNormalizer = null;
    Vector weightsPerFeature = null;
    Matrix weightsPerLabelAndFeature;/*  www.j a  v  a 2  s  .  c  o  m*/
    float alphaI;

    FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin"));
    try {
        alphaI = in.readFloat();
        weightsPerFeature = VectorWritable.readVector(in);
        weightsPerLabel = new DenseVector(VectorWritable.readVector(in));
        perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in));

        weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size());
        for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) {
            weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in));
        }
    } finally {
        Closeables.close(in, true);
    }
    NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel,
            perLabelThetaNormalizer, alphaI);
    model.validate();
    return model;
}

From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java

License:Apache License

public static NaiveBayesModel materializeLocal(String modelfile) throws IOException {

    Vector weightsPerLabel = null;
    Vector perLabelThetaNormalizer = null;
    Vector weightsPerFeature = null;
    Matrix weightsPerLabelAndFeature;/*from w  w  w .j  av  a  2  s  .c o  m*/
    float alphaI;

    System.out.println(modelfile);
    ClassLoader loader = NaiveBayesModel.class.getClassLoader();
    InputStream sin = loader.getResourceAsStream(modelfile);
    DataInputStream in = new DataInputStream(sin);
    try {
        alphaI = in.readFloat();
        weightsPerFeature = VectorWritable.readVector(in);
        weightsPerLabel = new DenseVector(VectorWritable.readVector(in));
        perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in));

        weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size());
        for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) {
            weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in));
        }
    } finally {
        in.close();
    }
    NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel,
            perLabelThetaNormalizer, alphaI);
    model.validate();
    return model;
}

From source file:com.netease.news.classifier.naivebayes.WeightsMapper.java

License:Apache License

@Override
protected void map(IntWritable index, VectorWritable value, Context ctx)
        throws IOException, InterruptedException {
    Vector instance = value.get();
    if (weightsPerFeature == null) {
        weightsPerFeature = new RandomAccessSparseVector(instance.size(), instance.getNumNondefaultElements());
    }//from  w ww  .j a  v  a 2  s  .c o m

    int label = index.get();
    weightsPerFeature.assign(instance, Functions.PLUS);
    weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum());
}

From source file:com.scaleunlimited.classify.model.RawFeaturesLibLinearModel.java

License:Apache License

private FeatureNode[] vectorToFeatureNodes(Vector vector) {
    int featureCount = vector.getNumNondefaultElements();
    FeatureNode[] x = new FeatureNode[featureCount];
    int arrayIndex = 0;
    int cardinality = vector.size();
    for (int i = 0; i < cardinality; i++) {
        double value = vector.getQuick(i);
        if (value != 0.0) {
            // (At least) Linear.train assumes that FeatureNode.index
            // is 1-based, and we don't really have to map back to our
            // term indexes, so just add one. YUCK!
            x[arrayIndex++] = new FeatureNode(i + 1, value);
        }/*from  ww w . j av  a 2 s .c  o m*/
    }
    return x;
}

From source file:com.scaleunlimited.classify.vectors.BaseNormalizer.java

License:Apache License

public static void dumpTopTerms(final Vector docFrequencies, List<String> uniqueTerms) {
    int cardinality = docFrequencies.size();
    List<Integer> sortedDocFrequencyIndexes = new ArrayList<Integer>(cardinality);
    for (int i = 0; i < cardinality; i++) {
        sortedDocFrequencyIndexes.add(i);
    }//from   w  w  w.java 2 s  . c  o  m

    Collections.sort(sortedDocFrequencyIndexes, new Comparator<Integer>() {
        @Override
        public int compare(Integer o1, Integer o2) {
            return (int) (docFrequencies.getQuick(o2) - docFrequencies.getQuick(o1));
        }
    });

    double maxFrequency = docFrequencies.getQuick(docFrequencies.maxValueIndex());
    StringBuffer topTermsReport = new StringBuffer();
    for (int i = 0; i < cardinality; i++) {
        int index = sortedDocFrequencyIndexes.get(i);
        double frequency = docFrequencies.getQuick(index);
        if ((frequency / maxFrequency) > MIN_FREQUENCY_REPORT_RATIO) {
            topTermsReport.append(String.format("'%s'=%d, ", uniqueTerms.get(index), (int) frequency));
        }
    }

    LOGGER.debug(topTermsReport.toString());
}

From source file:com.scaleunlimited.classify.vectors.UnitNormalizer.java

License:Apache License

@Override
public void normalize(Vector vector) {

    double length = Math.sqrt(vector.getLengthSquared());

    // Divide each vector coordinate by length, so we wind up with a unit vector.
    int cardinality = vector.size();
    for (int j = 0; j < cardinality; j++) {
        double curValue = vector.getQuick(j);
        if (curValue > 0.0) {
            vector.setQuick(j, curValue / length);
        }/*from w  ww  .j av  a  2 s. co  m*/
    }
}

From source file:com.scaleunlimited.classify.vectors.VectorUtils.java

License:Apache License

public static Vector appendVectors(Vector baseVector, Vector extraVector) {
    int baseSize = baseVector.size();
    Vector result = new RandomAccessSparseVector(baseSize + extraVector.size());

    for (int i = 0; i < baseSize; i++) {
        double value = baseVector.getQuick(i);
        if (value != 0.0) {
            result.setQuick(i, value);/* w  ww  .j  a v a2 s .  c  o m*/
        }
    }

    for (int i = 0; i < extraVector.size(); i++) {
        double value = extraVector.getQuick(i);
        if (value != 0.0) {
            result.setQuick(baseSize + i, value);
        }
    }

    return result;
}

From source file:com.scaleunlimited.classify.vectors.VectorUtils.java

License:Apache License

public static Vector extendVector(Vector v, int extraSize) {
    if (extraSize == 0) {
        return v;
    }//w  w w . j  a  v  a 2  s .c  o  m

    int baseSize = v.size();
    Vector result = new RandomAccessSparseVector(baseSize + extraSize);
    for (int i = 0; i < baseSize; i++) {
        double value = v.getQuick(i);
        if (value != 0.0) {
            result.setQuick(i, value);
        }
    }

    return result;
}