Example usage for org.apache.mahout.math Vector size

List of usage examples for org.apache.mahout.math Vector size

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector size.

Prototype

int size();

Source Link

Document

Return the cardinality of the recipient (the maximum number of values)

Usage

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

/**
 * sum_x sum_a (c_ai * log(p(x|i) * p(a|x)))
 *//*from  ww w .  java  2  s . c  o  m*/
public double perplexity(Vector document, Vector docTopics) {
    double perplexity = 0;
    double norm = docTopics.norm(1) + (docTopics.size() * alpha);
    Iterator<Vector.Element> it = document.iterateNonZero();
    while (it.hasNext()) {
        Vector.Element e = it.next();
        int term = e.index();
        double prob = 0;
        for (int x = 0; x < numTopics; x++) {
            double d = (docTopics.get(x) + alpha) / norm;
            double p = d * (topicTermCounts.viewRow(x).get(term) + eta) / (topicSums.get(x) + eta * numTerms);
            prob += p;
        }
        perplexity += e.get() * Math.log(prob);
    }
    return -perplexity;
}

From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java

License:Open Source License

private static BasicDBList listFromMahoutVector(Vector vec, String prefix, BasicDBObject element) {
    if (vec instanceof NamedVector) {
        element.put(prefix + "Name", ((NamedVector) vec).getName());
    }//  w w  w.  java2s  .  co m
    BasicDBList dbl2 = new BasicDBList();
    if (vec.isDense()) {
        int nSize = vec.size();
        dbl2.ensureCapacity(nSize);
        for (int i = 0; i < nSize; ++i) {
            dbl2.add(vec.getQuick(i));
        }
    } else { // sparse, write as a set in the format [{int:double}]
        Iterator<org.apache.mahout.math.Vector.Element> elIt = vec.iterateNonZero();
        while (elIt.hasNext()) {
            BasicDBObject el2 = new BasicDBObject();
            org.apache.mahout.math.Vector.Element el = elIt.next();
            el2.put("k", el.index());
            el2.put("v", el.get());
            dbl2.add(el2);
        }
    }
    return dbl2;
}

From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java

License:Apache License

public static NaiveBayesModel materialize(Path output, Configuration conf) throws IOException {
    FileSystem fs = output.getFileSystem(conf);

    Vector weightsPerLabel = null;
    Vector perLabelThetaNormalizer = null;
    Vector weightsPerFeature = null;
    Matrix weightsPerLabelAndFeature;/*  www.j a  v  a 2  s  .  c  o  m*/
    float alphaI;

    FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin"));
    try {
        alphaI = in.readFloat();
        weightsPerFeature = VectorWritable.readVector(in);
        weightsPerLabel = new DenseVector(VectorWritable.readVector(in));
        perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in));

        weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size());
        for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) {
            weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in));
        }
    } finally {
        Closeables.close(in, true);
    }
    NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel,
            perLabelThetaNormalizer, alphaI);
    model.validate();
    return model;
}

From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java

License:Apache License

public static NaiveBayesModel materializeLocal(String modelfile) throws IOException {

    Vector weightsPerLabel = null;
    Vector perLabelThetaNormalizer = null;
    Vector weightsPerFeature = null;
    Matrix weightsPerLabelAndFeature;/*from w  w  w .j  av  a  2  s  .c o  m*/
    float alphaI;

    System.out.println(modelfile);
    ClassLoader loader = NaiveBayesModel.class.getClassLoader();
    InputStream sin = loader.getResourceAsStream(modelfile);
    DataInputStream in = new DataInputStream(sin);
    try {
        alphaI = in.readFloat();
        weightsPerFeature = VectorWritable.readVector(in);
        weightsPerLabel = new DenseVector(VectorWritable.readVector(in));
        perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in));

        weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size());
        for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) {
            weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in));
        }
    } finally {
        in.close();
    }
    NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel,
            perLabelThetaNormalizer, alphaI);
    model.validate();
    return model;
}

From source file:com.netease.news.classifier.naivebayes.WeightsMapper.java

License:Apache License

@Override
protected void map(IntWritable index, VectorWritable value, Context ctx)
        throws IOException, InterruptedException {
    Vector instance = value.get();
    if (weightsPerFeature == null) {
        weightsPerFeature = new RandomAccessSparseVector(instance.size(), instance.getNumNondefaultElements());
    }//from  w ww  .j a  v  a 2  s  .c o m

    int label = index.get();
    weightsPerFeature.assign(instance, Functions.PLUS);
    weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum());
}

From source file:com.scaleunlimited.classify.model.RawFeaturesLibLinearModel.java

License:Apache License

private FeatureNode[] vectorToFeatureNodes(Vector vector) {
    int featureCount = vector.getNumNondefaultElements();
    FeatureNode[] x = new FeatureNode[featureCount];
    int arrayIndex = 0;
    int cardinality = vector.size();
    for (int i = 0; i < cardinality; i++) {
        double value = vector.getQuick(i);
        if (value != 0.0) {
            // (At least) Linear.train assumes that FeatureNode.index
            // is 1-based, and we don't really have to map back to our
            // term indexes, so just add one. YUCK!
            x[arrayIndex++] = new FeatureNode(i + 1, value);
        }/*from  ww w . j av  a 2 s .c  o m*/
    }
    return x;
}

From source file:com.scaleunlimited.classify.vectors.BaseNormalizer.java

License:Apache License

public static void dumpTopTerms(final Vector docFrequencies, List<String> uniqueTerms) {
    int cardinality = docFrequencies.size();
    List<Integer> sortedDocFrequencyIndexes = new ArrayList<Integer>(cardinality);
    for (int i = 0; i < cardinality; i++) {
        sortedDocFrequencyIndexes.add(i);
    }//from   w  w  w.java 2 s  . c  o  m

    Collections.sort(sortedDocFrequencyIndexes, new Comparator<Integer>() {
        @Override
        public int compare(Integer o1, Integer o2) {
            return (int) (docFrequencies.getQuick(o2) - docFrequencies.getQuick(o1));
        }
    });

    double maxFrequency = docFrequencies.getQuick(docFrequencies.maxValueIndex());
    StringBuffer topTermsReport = new StringBuffer();
    for (int i = 0; i < cardinality; i++) {
        int index = sortedDocFrequencyIndexes.get(i);
        double frequency = docFrequencies.getQuick(index);
        if ((frequency / maxFrequency) > MIN_FREQUENCY_REPORT_RATIO) {
            topTermsReport.append(String.format("'%s'=%d, ", uniqueTerms.get(index), (int) frequency));
        }
    }

    LOGGER.debug(topTermsReport.toString());
}

From source file:com.scaleunlimited.classify.vectors.UnitNormalizer.java

License:Apache License

@Override
public void normalize(Vector vector) {

    double length = Math.sqrt(vector.getLengthSquared());

    // Divide each vector coordinate by length, so we wind up with a unit vector.
    int cardinality = vector.size();
    for (int j = 0; j < cardinality; j++) {
        double curValue = vector.getQuick(j);
        if (curValue > 0.0) {
            vector.setQuick(j, curValue / length);
        }/*from w  ww  .j av  a  2 s. co  m*/
    }
}

From source file:com.scaleunlimited.classify.vectors.VectorUtils.java

License:Apache License

public static Vector appendVectors(Vector baseVector, Vector extraVector) {
    int baseSize = baseVector.size();
    Vector result = new RandomAccessSparseVector(baseSize + extraVector.size());

    for (int i = 0; i < baseSize; i++) {
        double value = baseVector.getQuick(i);
        if (value != 0.0) {
            result.setQuick(i, value);/* w  ww  .j  a v a2 s .  c  o m*/
        }
    }

    for (int i = 0; i < extraVector.size(); i++) {
        double value = extraVector.getQuick(i);
        if (value != 0.0) {
            result.setQuick(baseSize + i, value);
        }
    }

    return result;
}

From source file:com.scaleunlimited.classify.vectors.VectorUtils.java

License:Apache License

public static Vector extendVector(Vector v, int extraSize) {
    if (extraSize == 0) {
        return v;
    }//w  w w . j  a  v  a 2  s .c  o  m

    int baseSize = v.size();
    Vector result = new RandomAccessSparseVector(baseSize + extraSize);
    for (int i = 0; i < baseSize; i++) {
        double value = v.getQuick(i);
        if (value != 0.0) {
            result.setQuick(i, value);
        }
    }

    return result;
}