Example usage for org.apache.mahout.math Vector set

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector set.

Prototype

void set(int index, double value);

Source Link

Document

Set the value at the given index

Usage

From source file:org.qcri.sparkpca.PCAUtils.java

/**
 * multiply a dense vector by a matrix//from www .j  av  a2 s  .  c  o m
 * @param xm_mahout: result vector
 * @return
 */
static Vector denseVectorTimesMatrix(Vector vector, Matrix matrix, Vector xm_mahout) {
    int nRows = matrix.numRows();
    int nCols = matrix.numCols();
    for (int c = 0; c < nCols; c++) {
        double dotres = 0;
        for (int r = 0; r < nRows; r++)
            dotres += vector.getQuick(r) * matrix.getQuick(r, c);
        xm_mahout.set(c, dotres);
    }
    return xm_mahout;
}

From source file:org.trustedanalytics.atk.giraph.io.titan.formats.TitanVertexBuilder.java

License:Apache License

/**
 * Update vector values using Titan property value
 *
 * @param vector Mahout vector//w ww.  ja  v a  2s  .  co m
 * @param titanProperty Titan property
 * @return Updated Mahout vector
 */
public org.apache.mahout.math.Vector setVector(org.apache.mahout.math.Vector vector,
        TitanProperty titanProperty) {
    Object vertexValueObject = titanProperty.getValue();
    if (enableVectorValue) {
        //one property key has a vector as value
        //split by either space or comma or tab
        String[] valueString = vertexValueObject.toString().split(regexp);
        int size = valueString.length;
        double[] data = new double[size];
        vector = new DenseVector(data);
        for (int i = 0; i < valueString.length; i++) {
            vector.set(i, Double.parseDouble(valueString[i]));
        }
    } else {
        String propertyName = titanProperty.getPropertyKey().getName();
        int propertyIndex = vertexValuePropertyKeys.get(propertyName);
        double vertexValue = Double.parseDouble(vertexValueObject.toString());
        vector.set(propertyIndex, vertexValue);
    }
    return vector;
}

From source file:org.trustedanalytics.atk.giraph.io.VertexData4LPWritable.java

License:Apache License

/**
 * Initialize the labels on vertex/*from  w w  w  .  jav a  2 s. c o  m*/
 */
private void setStatusAndUnlabeledValues(boolean wasLabeled) {

    if (wasLabeled == false) {
        Vector temp = priorWritable.get();
        int size = temp.size();
        for (int i = 0; i < size; i++) {
            temp.set(i, 1 / size);
        }
        priorWritable.set(temp);
        posteriorWritable.set(temp);
    }
}

From source file:technobium.OnlineLogisticRegressionTest.java

License:Apache License

public static void main(String[] args) throws Exception {
    // this test trains a 3-way classifier on the famous Iris dataset.
    // a similar exercise can be accomplished in R using this code:
    //    library(nnet)
    //    correct = rep(0,100)
    //    for (j in 1:100) {
    //      i = order(runif(150))
    //      train = iris[i[1:100],]
    //      test = iris[i[101:150],]
    //      m = multinom(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, train)
    //      correct[j] = mean(predict(m, newdata=test) == test$Species)
    //    }/*from   w  ww  . j  ava2s .  c o m*/
    //    hist(correct)
    //
    // Note that depending on the training/test split, performance can be better or worse.
    // There is about a 5% chance of getting accuracy < 90% and about 20% chance of getting accuracy
    // of 100%
    //
    // This test uses a deterministic split that is neither outstandingly good nor bad

    RandomUtils.useTestSeed();
    Splitter onComma = Splitter.on(",");

    // read the data
    List<String> raw = Resources.readLines(Resources.getResource("iris.csv"), Charsets.UTF_8);

    // holds features
    List<Vector> data = Lists.newArrayList();

    // holds target variable
    List<Integer> target = Lists.newArrayList();

    // for decoding target values
    Dictionary dict = new Dictionary();

    // for permuting data later
    List<Integer> order = Lists.newArrayList();

    for (String line : raw.subList(1, raw.size())) {
        // order gets a list of indexes
        order.add(order.size());

        // parse the predictor variables
        Vector v = new DenseVector(5);
        v.set(0, 1);
        int i = 1;
        Iterable<String> values = onComma.split(line);
        for (String value : Iterables.limit(values, 4)) {
            v.set(i++, Double.parseDouble(value));
        }
        data.add(v);

        // and the target
        target.add(dict.intern(Iterables.get(values, 4)));
    }

    // randomize the order ... original data has each species all together
    // note that this randomization is deterministic
    Random random = RandomUtils.getRandom();
    Collections.shuffle(order, random);

    // select training and test data
    List<Integer> train = order.subList(0, 100);
    List<Integer> test = order.subList(100, 150);
    logger.warn("Training set = {}", train);
    logger.warn("Test set = {}", test);

    // now train many times and collect information on accuracy each time
    int[] correct = new int[test.size() + 1];
    for (int run = 0; run < 200; run++) {
        OnlineLogisticRegression lr = new OnlineLogisticRegression(3, 5, new L2(1));
        // 30 training passes should converge to > 95% accuracy nearly always but never to 100%
        for (int pass = 0; pass < 30; pass++) {
            Collections.shuffle(train, random);
            for (int k : train) {
                lr.train(target.get(k), data.get(k));
            }
        }

        // check the accuracy on held out data
        int x = 0;
        int[] count = new int[3];
        for (Integer k : test) {
            Vector vt = lr.classifyFull(data.get(k));
            int r = vt.maxValueIndex();
            count[r]++;
            x += r == target.get(k) ? 1 : 0;
        }
        correct[x]++;
    }

    // verify we never saw worse than 95% correct,
    for (int i = 0; i < Math.floor(0.95 * test.size()); i++) {
        System.out.println(String.format("%d trials had unacceptable accuracy of only %.0f%%: ", correct[i],
                100.0 * i / test.size()));
    }
    // nor perfect
    System.out.println(String.format("%d trials had unrealistic accuracy of 100%%", correct[test.size() - 1]));
}

From source file:tv.floe.metronome.io.records.RCV1RecordFactory.java

License:Apache License

public static void ScanFile(String file, int debug_break_cnt) throws IOException {

    BufferedReader reader = null;
    int line_count = 0;

    Multiset<String> class_count = ConcurrentHashMultiset.create();
    Multiset<String> namespaces = ConcurrentHashMultiset.create();

    try {/*from  w  ww . j a v  a2s  .c o  m*/
        reader = new BufferedReader(new FileReader(file));

        String line = reader.readLine();

        while (line != null && line.length() > 0) {

            String[] parts = line.split(" ");

            class_count.add(parts[0]);
            namespaces.add(parts[1]);

            line = reader.readLine();
            line_count++;

            Vector v = new RandomAccessSparseVector(FEATURES);

            for (int x = 2; x < parts.length; x++) {
                String[] feature = parts[x].split(":");
                int index = Integer.parseInt(feature[0]) % FEATURES;
                double val = Double.parseDouble(feature[1]);

                System.out.println(feature[1] + " = " + val);

                if (index < FEATURES) {
                    v.set(index, val);
                } else {

                    System.out.println("Could Hash: " + index + " to " + (index % FEATURES));

                }

            }

            System.out.println("###");

            if (line_count > debug_break_cnt) {
                break;
            }

        }

        System.out.println("Total Rec Count: " + line_count);

        System.out.println("-------------------- ");

        System.out.println("Classes");
        for (String word : class_count.elementSet()) {
            System.out.println("Class " + word + ": " + class_count.count(word) + " ");
        }

        System.out.println("-------------------- ");

        System.out.println("NameSpaces:");
        for (String word : namespaces.elementSet()) {
            System.out.println("Namespace " + word + ": " + namespaces.count(word) + " ");
        }

    } finally {
        reader.close();
    }

}

From source file:tv.floe.metronome.io.records.RCV1RecordFactory.java

License:Apache License

/**
 * Processes single line of input into: - target variable - Feature vector
 * /*from w ww .ja v a 2  s  .  c o m*/
 * Right now our hash function is simply "modulo"
 * 
 * @throws Exception
 */
@Override
public double processLineAlt(String line, Vector v) throws Exception {

    double actual = 0;

    String[] parts = line.split(" ");

    actual = Double.parseDouble(parts[0]);

    // dont know what to do the the "namespace" "f"
    v.set(0, 1.0);

    for (int x = 2; x < parts.length; x++) {

        String[] feature = parts[x].split(":");
        int index = (Integer.parseInt(feature[0]) + 1) % FEATURES;
        double val = Double.parseDouble(feature[1]);

        if (index < FEATURES) {
            v.set(index, val);
        } else {

            System.out.println("Could Hash: " + index + " to " + (index % FEATURES));

        }

    }

    return actual;
}

From source file:tv.floe.metronome.io.records.RCV1RecordFactory.java

License:Apache License

@Override
public void vectorizeLine(String line, Vector v_in, Vector v_out) throws Exception {
    double out = this.processLineAlt(line, v_in);
    v_out.set(0, out);
}