List of usage examples for org.apache.mahout.math Vector set
void set(int index, double value);
From source file:org.qcri.sparkpca.PCAUtils.java
/** * multiply a dense vector by a matrix//from www .j av a2 s . c o m * @param xm_mahout: result vector * @return */ static Vector denseVectorTimesMatrix(Vector vector, Matrix matrix, Vector xm_mahout) { int nRows = matrix.numRows(); int nCols = matrix.numCols(); for (int c = 0; c < nCols; c++) { double dotres = 0; for (int r = 0; r < nRows; r++) dotres += vector.getQuick(r) * matrix.getQuick(r, c); xm_mahout.set(c, dotres); } return xm_mahout; }
From source file:org.trustedanalytics.atk.giraph.io.titan.formats.TitanVertexBuilder.java
License:Apache License
/** * Update vector values using Titan property value * * @param vector Mahout vector//w ww. ja v a 2s . co m * @param titanProperty Titan property * @return Updated Mahout vector */ public org.apache.mahout.math.Vector setVector(org.apache.mahout.math.Vector vector, TitanProperty titanProperty) { Object vertexValueObject = titanProperty.getValue(); if (enableVectorValue) { //one property key has a vector as value //split by either space or comma or tab String[] valueString = vertexValueObject.toString().split(regexp); int size = valueString.length; double[] data = new double[size]; vector = new DenseVector(data); for (int i = 0; i < valueString.length; i++) { vector.set(i, Double.parseDouble(valueString[i])); } } else { String propertyName = titanProperty.getPropertyKey().getName(); int propertyIndex = vertexValuePropertyKeys.get(propertyName); double vertexValue = Double.parseDouble(vertexValueObject.toString()); vector.set(propertyIndex, vertexValue); } return vector; }
From source file:org.trustedanalytics.atk.giraph.io.VertexData4LPWritable.java
License:Apache License
/** * Initialize the labels on vertex/*from w w w . jav a 2 s. c o m*/ */ private void setStatusAndUnlabeledValues(boolean wasLabeled) { if (wasLabeled == false) { Vector temp = priorWritable.get(); int size = temp.size(); for (int i = 0; i < size; i++) { temp.set(i, 1 / size); } priorWritable.set(temp); posteriorWritable.set(temp); } }
From source file:technobium.OnlineLogisticRegressionTest.java
License:Apache License
public static void main(String[] args) throws Exception { // this test trains a 3-way classifier on the famous Iris dataset. // a similar exercise can be accomplished in R using this code: // library(nnet) // correct = rep(0,100) // for (j in 1:100) { // i = order(runif(150)) // train = iris[i[1:100],] // test = iris[i[101:150],] // m = multinom(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, train) // correct[j] = mean(predict(m, newdata=test) == test$Species) // }/*from w ww . j ava2s . c o m*/ // hist(correct) // // Note that depending on the training/test split, performance can be better or worse. // There is about a 5% chance of getting accuracy < 90% and about 20% chance of getting accuracy // of 100% // // This test uses a deterministic split that is neither outstandingly good nor bad RandomUtils.useTestSeed(); Splitter onComma = Splitter.on(","); // read the data List<String> raw = Resources.readLines(Resources.getResource("iris.csv"), Charsets.UTF_8); // holds features List<Vector> data = Lists.newArrayList(); // holds target variable List<Integer> target = Lists.newArrayList(); // for decoding target values Dictionary dict = new Dictionary(); // for permuting data later List<Integer> order = Lists.newArrayList(); for (String line : raw.subList(1, raw.size())) { // order gets a list of indexes order.add(order.size()); // parse the predictor variables Vector v = new DenseVector(5); v.set(0, 1); int i = 1; Iterable<String> values = onComma.split(line); for (String value : Iterables.limit(values, 4)) { v.set(i++, Double.parseDouble(value)); } data.add(v); // and the target target.add(dict.intern(Iterables.get(values, 4))); } // randomize the order ... original data has each species all together // note that this randomization is deterministic Random random = RandomUtils.getRandom(); Collections.shuffle(order, random); // select training and test data List<Integer> train = order.subList(0, 100); List<Integer> test = order.subList(100, 150); logger.warn("Training set = {}", train); logger.warn("Test set = {}", test); // now train many times and collect information on accuracy each time int[] correct = new int[test.size() + 1]; for (int run = 0; run < 200; run++) { OnlineLogisticRegression lr = new OnlineLogisticRegression(3, 5, new L2(1)); // 30 training passes should converge to > 95% accuracy nearly always but never to 100% for (int pass = 0; pass < 30; pass++) { Collections.shuffle(train, random); for (int k : train) { lr.train(target.get(k), data.get(k)); } } // check the accuracy on held out data int x = 0; int[] count = new int[3]; for (Integer k : test) { Vector vt = lr.classifyFull(data.get(k)); int r = vt.maxValueIndex(); count[r]++; x += r == target.get(k) ? 1 : 0; } correct[x]++; } // verify we never saw worse than 95% correct, for (int i = 0; i < Math.floor(0.95 * test.size()); i++) { System.out.println(String.format("%d trials had unacceptable accuracy of only %.0f%%: ", correct[i], 100.0 * i / test.size())); } // nor perfect System.out.println(String.format("%d trials had unrealistic accuracy of 100%%", correct[test.size() - 1])); }
From source file:tv.floe.metronome.io.records.RCV1RecordFactory.java
License:Apache License
public static void ScanFile(String file, int debug_break_cnt) throws IOException { BufferedReader reader = null; int line_count = 0; Multiset<String> class_count = ConcurrentHashMultiset.create(); Multiset<String> namespaces = ConcurrentHashMultiset.create(); try {/*from w ww . j a v a2s .c o m*/ reader = new BufferedReader(new FileReader(file)); String line = reader.readLine(); while (line != null && line.length() > 0) { String[] parts = line.split(" "); class_count.add(parts[0]); namespaces.add(parts[1]); line = reader.readLine(); line_count++; Vector v = new RandomAccessSparseVector(FEATURES); for (int x = 2; x < parts.length; x++) { String[] feature = parts[x].split(":"); int index = Integer.parseInt(feature[0]) % FEATURES; double val = Double.parseDouble(feature[1]); System.out.println(feature[1] + " = " + val); if (index < FEATURES) { v.set(index, val); } else { System.out.println("Could Hash: " + index + " to " + (index % FEATURES)); } } System.out.println("###"); if (line_count > debug_break_cnt) { break; } } System.out.println("Total Rec Count: " + line_count); System.out.println("-------------------- "); System.out.println("Classes"); for (String word : class_count.elementSet()) { System.out.println("Class " + word + ": " + class_count.count(word) + " "); } System.out.println("-------------------- "); System.out.println("NameSpaces:"); for (String word : namespaces.elementSet()) { System.out.println("Namespace " + word + ": " + namespaces.count(word) + " "); } } finally { reader.close(); } }
From source file:tv.floe.metronome.io.records.RCV1RecordFactory.java
License:Apache License
/** * Processes single line of input into: - target variable - Feature vector * /*from w ww .ja v a 2 s . c o m*/ * Right now our hash function is simply "modulo" * * @throws Exception */ @Override public double processLineAlt(String line, Vector v) throws Exception { double actual = 0; String[] parts = line.split(" "); actual = Double.parseDouble(parts[0]); // dont know what to do the the "namespace" "f" v.set(0, 1.0); for (int x = 2; x < parts.length; x++) { String[] feature = parts[x].split(":"); int index = (Integer.parseInt(feature[0]) + 1) % FEATURES; double val = Double.parseDouble(feature[1]); if (index < FEATURES) { v.set(index, val); } else { System.out.println("Could Hash: " + index + " to " + (index % FEATURES)); } } return actual; }
From source file:tv.floe.metronome.io.records.RCV1RecordFactory.java
License:Apache License
@Override public void vectorizeLine(String line, Vector v_in, Vector v_out) throws Exception { double out = this.processLineAlt(line, v_in); v_out.set(0, out); }