Example usage for org.apache.mahout.math Vector set

List of usage examples for org.apache.mahout.math Vector set

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector set.

Prototype

void set(int index, double value);

Source Link

Document

Set the value at the given index

Usage

From source file:org.qcri.sparkpca.PCAUtils.java

/**
 * multiply a dense vector by a matrix//from www .j  av  a2 s  .  c  o m
 * @param xm_mahout: result vector
 * @return
 */
static Vector denseVectorTimesMatrix(Vector vector, Matrix matrix, Vector xm_mahout) {
    int nRows = matrix.numRows();
    int nCols = matrix.numCols();
    for (int c = 0; c < nCols; c++) {
        double dotres = 0;
        for (int r = 0; r < nRows; r++)
            dotres += vector.getQuick(r) * matrix.getQuick(r, c);
        xm_mahout.set(c, dotres);
    }
    return xm_mahout;
}

From source file:org.trustedanalytics.atk.giraph.io.titan.formats.TitanVertexBuilder.java

License:Apache License

/**
 * Update vector values using Titan property value
 *
 * @param vector Mahout vector//w ww.  ja  v a  2s  .  co m
 * @param titanProperty Titan property
 * @return Updated Mahout vector
 */
public org.apache.mahout.math.Vector setVector(org.apache.mahout.math.Vector vector,
        TitanProperty titanProperty) {
    Object vertexValueObject = titanProperty.getValue();
    if (enableVectorValue) {
        //one property key has a vector as value
        //split by either space or comma or tab
        String[] valueString = vertexValueObject.toString().split(regexp);
        int size = valueString.length;
        double[] data = new double[size];
        vector = new DenseVector(data);
        for (int i = 0; i < valueString.length; i++) {
            vector.set(i, Double.parseDouble(valueString[i]));
        }
    } else {
        String propertyName = titanProperty.getPropertyKey().getName();
        int propertyIndex = vertexValuePropertyKeys.get(propertyName);
        double vertexValue = Double.parseDouble(vertexValueObject.toString());
        vector.set(propertyIndex, vertexValue);
    }
    return vector;
}

From source file:org.trustedanalytics.atk.giraph.io.VertexData4LPWritable.java

License:Apache License

/**
 * Initialize the labels on vertex/*from  w w  w  .  jav a  2 s. c o  m*/
 */
private void setStatusAndUnlabeledValues(boolean wasLabeled) {

    if (wasLabeled == false) {
        Vector temp = priorWritable.get();
        int size = temp.size();
        for (int i = 0; i < size; i++) {
            temp.set(i, 1 / size);
        }
        priorWritable.set(temp);
        posteriorWritable.set(temp);
    }
}

From source file:technobium.OnlineLogisticRegressionTest.java

License:Apache License

public static void main(String[] args) throws Exception {
    // this test trains a 3-way classifier on the famous Iris dataset.
    // a similar exercise can be accomplished in R using this code:
    //    library(nnet)
    //    correct = rep(0,100)
    //    for (j in 1:100) {
    //      i = order(runif(150))
    //      train = iris[i[1:100],]
    //      test = iris[i[101:150],]
    //      m = multinom(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, train)
    //      correct[j] = mean(predict(m, newdata=test) == test$Species)
    //    }/*from   w  ww  . j  ava2s .  c o m*/
    //    hist(correct)
    //
    // Note that depending on the training/test split, performance can be better or worse.
    // There is about a 5% chance of getting accuracy < 90% and about 20% chance of getting accuracy
    // of 100%
    //
    // This test uses a deterministic split that is neither outstandingly good nor bad

    RandomUtils.useTestSeed();
    Splitter onComma = Splitter.on(",");

    // read the data
    List<String> raw = Resources.readLines(Resources.getResource("iris.csv"), Charsets.UTF_8);

    // holds features
    List<Vector> data = Lists.newArrayList();

    // holds target variable
    List<Integer> target = Lists.newArrayList();

    // for decoding target values
    Dictionary dict = new Dictionary();

    // for permuting data later
    List<Integer> order = Lists.newArrayList();

    for (String line : raw.subList(1, raw.size())) {
        // order gets a list of indexes
        order.add(order.size());

        // parse the predictor variables
        Vector v = new DenseVector(5);
        v.set(0, 1);
        int i = 1;
        Iterable<String> values = onComma.split(line);
        for (String value : Iterables.limit(values, 4)) {
            v.set(i++, Double.parseDouble(value));
        }
        data.add(v);

        // and the target
        target.add(dict.intern(Iterables.get(values, 4)));
    }

    // randomize the order ... original data has each species all together
    // note that this randomization is deterministic
    Random random = RandomUtils.getRandom();
    Collections.shuffle(order, random);

    // select training and test data
    List<Integer> train = order.subList(0, 100);
    List<Integer> test = order.subList(100, 150);
    logger.warn("Training set = {}", train);
    logger.warn("Test set = {}", test);

    // now train many times and collect information on accuracy each time
    int[] correct = new int[test.size() + 1];
    for (int run = 0; run < 200; run++) {
        OnlineLogisticRegression lr = new OnlineLogisticRegression(3, 5, new L2(1));
        // 30 training passes should converge to > 95% accuracy nearly always but never to 100%
        for (int pass = 0; pass < 30; pass++) {
            Collections.shuffle(train, random);
            for (int k : train) {
                lr.train(target.get(k), data.get(k));
            }
        }

        // check the accuracy on held out data
        int x = 0;
        int[] count = new int[3];
        for (Integer k : test) {
            Vector vt = lr.classifyFull(data.get(k));
            int r = vt.maxValueIndex();
            count[r]++;
            x += r == target.get(k) ? 1 : 0;
        }
        correct[x]++;
    }

    // verify we never saw worse than 95% correct,
    for (int i = 0; i < Math.floor(0.95 * test.size()); i++) {
        System.out.println(String.format("%d trials had unacceptable accuracy of only %.0f%%: ", correct[i],
                100.0 * i / test.size()));
    }
    // nor perfect
    System.out.println(String.format("%d trials had unrealistic accuracy of 100%%", correct[test.size() - 1]));
}

From source file:tv.floe.metronome.io.records.RCV1RecordFactory.java

License:Apache License

public static void ScanFile(String file, int debug_break_cnt) throws IOException {

    BufferedReader reader = null;
    int line_count = 0;

    Multiset<String> class_count = ConcurrentHashMultiset.create();
    Multiset<String> namespaces = ConcurrentHashMultiset.create();

    try {/*from  w  ww . j a v  a2s  .c o  m*/
        reader = new BufferedReader(new FileReader(file));

        String line = reader.readLine();

        while (line != null && line.length() > 0) {

            String[] parts = line.split(" ");

            class_count.add(parts[0]);
            namespaces.add(parts[1]);

            line = reader.readLine();
            line_count++;

            Vector v = new RandomAccessSparseVector(FEATURES);

            for (int x = 2; x < parts.length; x++) {
                String[] feature = parts[x].split(":");
                int index = Integer.parseInt(feature[0]) % FEATURES;
                double val = Double.parseDouble(feature[1]);

                System.out.println(feature[1] + " = " + val);

                if (index < FEATURES) {
                    v.set(index, val);
                } else {

                    System.out.println("Could Hash: " + index + " to " + (index % FEATURES));

                }

            }

            System.out.println("###");

            if (line_count > debug_break_cnt) {
                break;
            }

        }

        System.out.println("Total Rec Count: " + line_count);

        System.out.println("-------------------- ");

        System.out.println("Classes");
        for (String word : class_count.elementSet()) {
            System.out.println("Class " + word + ": " + class_count.count(word) + " ");
        }

        System.out.println("-------------------- ");

        System.out.println("NameSpaces:");
        for (String word : namespaces.elementSet()) {
            System.out.println("Namespace " + word + ": " + namespaces.count(word) + " ");
        }

    } finally {
        reader.close();
    }

}

From source file:tv.floe.metronome.io.records.RCV1RecordFactory.java

License:Apache License

/**
 * Processes single line of input into: - target variable - Feature vector
 * /*from w ww .ja v a 2  s  .  c o m*/
 * Right now our hash function is simply "modulo"
 * 
 * @throws Exception
 */
@Override
public double processLineAlt(String line, Vector v) throws Exception {

    double actual = 0;

    String[] parts = line.split(" ");

    actual = Double.parseDouble(parts[0]);

    // dont know what to do the the "namespace" "f"
    v.set(0, 1.0);

    for (int x = 2; x < parts.length; x++) {

        String[] feature = parts[x].split(":");
        int index = (Integer.parseInt(feature[0]) + 1) % FEATURES;
        double val = Double.parseDouble(feature[1]);

        if (index < FEATURES) {
            v.set(index, val);
        } else {

            System.out.println("Could Hash: " + index + " to " + (index % FEATURES));

        }

    }

    return actual;
}

From source file:tv.floe.metronome.io.records.RCV1RecordFactory.java

License:Apache License

@Override
public void vectorizeLine(String line, Vector v_in, Vector v_out) throws Exception {
    double out = this.processLineAlt(line, v_in);
    v_out.set(0, out);
}