List of usage examples for org.apache.mahout.math Vector getQuick
double getQuick(int index);
From source file:edu.utsa.sifter.som.SelfOrganizingMap.java
License:Apache License
public void updateCell(final int id, final double alpha, final IntArrayWritable doc) { // Scalable SOM updating, per Roussinov final double rate = 1 - alpha; final double f = CellFactors[id]; final double nextF = rate * f; // Rule 5 final double adjustment = alpha / (rate * CellFactors[id]); // Rule 6 double sumSqrOld = 0.0; double sumSqrNew = 0.0; double c1 = 0.0, // Kahan summation algorithm to account for error, c.f. http://en.wikipedia.org/wiki/Kahan_summation_algorithm c2 = 0.0, y, t;//from w w w.j a v a 2 s . c om final Vector weights = getCell(id); double weight; double trueWeight; int idx; final int[] terms = doc.getInts(); final int numTerms = doc.getLength(); for (int i = 0; i < numTerms; ++i) { idx = terms[i]; weight = weights.getQuick(idx); trueWeight = weight * f; y = (trueWeight * trueWeight) - c1; t = sumSqrOld + y; // S'(t+1) component c1 = (t - sumSqrOld) - y; sumSqrOld = t; // sumSqrOld += trueWeight * trueWeight; weight += adjustment; // adjust weight trueWeight = weight * nextF; y = (trueWeight * trueWeight) - c2; t = sumSqrNew + y; c2 = (t - sumSqrNew) - y; sumSqrNew = t; // sumSqrNew += trueWeight * trueWeight; // S_2'(t+1) component weights.setQuick(idx, weight); } CellFactors[id] = nextF; S2[id] = sumSqrNew + (rate * rate) * (S2[id] - sumSqrOld); // new S2 component }
From source file:hadoop.api.AggregateAndRecommendReducer.java
License:Apache License
private void reduceNonBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values, Context context) throws IOException, InterruptedException { /* each entry here is the sum in the numerator of the prediction formula */ Vector numerators = null;/*from ww w. jav a 2 s .c o m*/ /* each entry here is the sum in the denominator of the prediction formula */ Vector denominators = null; /* each entry here is the number of similar items used in the prediction formula */ Vector numberOfSimilarItemsUsed = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) { Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn(); float prefValue = prefAndSimilarityColumn.getPrefValue(); /* count the number of items used for each prediction */ for (Element e : simColumn.nonZeroes()) { int itemIDIndex = e.index(); numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1); } if (denominators == null) { denominators = simColumn.clone(); } else { denominators.assign(simColumn, Functions.PLUS_ABS); } if (numerators == null) { numerators = simColumn.clone(); if (prefValue != BOOLEAN_PREF_VALUE) { numerators.assign(Functions.MULT, prefValue); } } else { if (prefValue != BOOLEAN_PREF_VALUE) { simColumn.assign(Functions.MULT, prefValue); } numerators.assign(simColumn, Functions.PLUS); } } if (numerators == null) { return; } Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (Element element : numerators.nonZeroes()) { int itemIDIndex = element.index(); /* preference estimations must be based on at least 2 datapoints */ if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) { /* compute normalized prediction */ double prediction = element.get() / denominators.getQuick(itemIDIndex); recommendationVector.setQuick(itemIDIndex, prediction); } } writeRecommendedItems(userID, recommendationVector, context); }
From source file:nl.gridline.zieook.inx.movielens.AggregateAndRecommendReducer.java
License:Apache License
private void reduceNonBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values, Context context) throws IOException, InterruptedException { /* each entry here is the sum in the numerator of the prediction formula */ Vector numerators = null;/* w w w .ja va 2 s .c om*/ /* each entry here is the sum in the denominator of the prediction formula */ Vector denominators = null; /* each entry here is the number of similar items used in the prediction formula */ Vector numberOfSimilarItemsUsed = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) { Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn(); float prefValue = prefAndSimilarityColumn.getPrefValue(); /* count the number of items used for each prediction */ Iterator<Vector.Element> usedItemsIterator = simColumn.iterateNonZero(); while (usedItemsIterator.hasNext()) { int itemIDIndex = usedItemsIterator.next().index(); numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1); } numerators = numerators == null ? prefValue == BOOLEAN_PREF_VALUE ? simColumn.clone() : simColumn.times(prefValue) : numerators.plus(prefValue == BOOLEAN_PREF_VALUE ? simColumn : simColumn.times(prefValue)); simColumn.assign(ABSOLUTE_VALUES); denominators = denominators == null ? simColumn : denominators.plus(simColumn); } if (numerators == null) { return; } Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); Iterator<Vector.Element> iterator = numerators.iterateNonZero(); while (iterator.hasNext()) { Vector.Element element = iterator.next(); int itemIDIndex = element.index(); /* preference estimations must be based on at least 2 datapoints */ if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) { /* compute normalized prediction */ double prediction = element.get() / denominators.getQuick(itemIDIndex); recommendationVector.setQuick(itemIDIndex, prediction); } } writeRecommendedItems(userID, recommendationVector, context); }
From source file:org.qcri.pca.MahoutCompatibilityTest.java
License:Apache License
@Test public void testMAHOUT_1221() { // create a matrix with an unassigned row 0 Matrix matrix = new SparseMatrix(1, 1); Vector view = matrix.viewRow(0); final double value = 1.23; view.assign(value);//from ww w . ja v a 2s . c o m // test whether the update in the view is reflected in the matrix assertEquals("Matrix valye", view.getQuick(0), matrix.getQuick(0, 0), EPSILON); }
From source file:org.qcri.pca.MeanAndSpanJob.java
/** * This method overrides the Vector.assign method to allow optimization for * ZeroIndifferent functions/*from ww w .ja va 2s.co m*/ * * @param vector * the vector to be updated * @param other * the other vector * @param function * the function that operates on elements of the two vectors * @return the modified vector */ static public Vector vectorAssign(Vector vector, Vector other, ZeroIndifferentFunc function) { if (vector.size() != other.size()) { throw new CardinalityException(vector.size(), other.size()); } // special case: iterate only over the non-zero elements of the vector to // add Iterator<Element> it = other.nonZeroes().iterator(); Element e; while (it.hasNext() && (e = it.next()) != null) { double val = vector.getQuick(e.index()); double newVal = function.apply(val, e.get()); vector.setQuick(e.index(), newVal); } return vector; }
From source file:org.qcri.pca.NormalizeJob.java
static void sparseVectorAssign(Vector mainV, final Vector otherV, DoubleDoubleFunction function) { java.util.Vector<IndexValue> newZeroElements = new java.util.Vector<IndexValue>(); Iterator<Vector.Element> nonZeroElements = mainV.nonZeroes().iterator(); while (nonZeroElements.hasNext()) { Vector.Element e = nonZeroElements.next(); double res = function.apply(e.get(), otherV.getQuick(e.index())); if (res != 0) mainV.setQuick(e.index(), res); else //Don't affect the iterator newZeroElements.add(new IndexValue(e.index(), res)); }//from w w w.j a va2s .co m for (IndexValue iv : newZeroElements) mainV.setQuick(iv.index, iv.value); }
From source file:org.qcri.pca.PCACommon.java
static Vector vectorTimesMatrixTranspose(Vector vector, Matrix matrix, DenseVector resVector) { int nRows = matrix.numRows(); int nCols = matrix.numCols(); for (int r = 0; r < nRows; r++) { double dotres = 0; for (int c = 0; c < nCols; c++) dotres += vector.getQuick(c) * matrix.getQuick(r, c); resVector.set(r, dotres);/*from ww w. jav a 2 s .co m*/ } return resVector; }
From source file:org.qcri.pca.ReconstructionErrJob.java
static void denseVectorPlusAbsDenseDiff(DenseVector denseVector, Vector sparseVector, DenseVector meanVector) { for (int i = 0; i < denseVector.size(); i++) { double denseV = denseVector.getQuick(i); double v = sparseVector.getQuick(i); double mean = meanVector.getQuick(i); denseVector.setQuick(i, denseV + Math.abs(v - mean)); }/* w ww . j a v a 2 s.c o m*/ }
From source file:org.qcri.pca.ReconstructionErrJob.java
static void denseVectorSubtractSparseSubtractDense(DenseVector mainVector, Vector subtractor1, DenseVector subtractor2) {//from w ww.ja va2 s . co m int nCols = mainVector.size(); for (int c = 0; c < nCols; c++) { double v = mainVector.getQuick(c); v -= subtractor1.getQuick(c); v -= subtractor2.getQuick(c); mainVector.setQuick(c, v); } }
From source file:org.qcri.pca.SPCADriver.java
/** * Run PPCA sequentially given the small input Y which fit into memory This * could be used also on sampled data from a distributed matrix * /*from w w w . j a v a 2 s. com*/ * Note: this implementation ignore NaN values by replacing them with 0 * * @param conf * the configuration * @param centralY * the input matrix * @param initVal * the initial values for C and ss * @param MAX_ROUNDS * maximum number of iterations * @return the error * @throws Exception */ double runSequential(Configuration conf, Matrix centralY, InitialValues initVal, final int MAX_ROUNDS) throws Exception { Matrix centralC = initVal.C; double ss = initVal.ss; final int nRows = centralY.numRows(); final int nCols = centralY.numCols(); final int nPCs = centralC.numCols(); final float threshold = 0.00001f; log.info("tracec= " + PCACommon.trace(centralC)); //ignore NaN elements by replacing them with 0 for (int r = 0; r < nRows; r++) for (int c = 0; c < nCols; c++) if (new Double(centralY.getQuick(r, c)).isNaN()) { centralY.setQuick(r, c, 0); } //centralize and normalize the input matrix Vector mean = centralY.aggregateColumns(new VectorFunction() { @Override public double apply(Vector v) { return v.zSum() / nRows; } }); //also normalize the matrix by dividing each element by its columns range Vector spanVector = new DenseVector(nCols); for (int c = 0; c < nCols; c++) { Vector col = centralY.viewColumn(c); double max = col.maxValue(); double min = col.minValue(); double span = max - min; spanVector.setQuick(c, span); } for (int r = 0; r < nRows; r++) for (int c = 0; c < nCols; c++) centralY.set(r, c, (centralY.get(r, c) - mean.get(c)) / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1)); Matrix centralCtC = centralC.transpose().times(centralC); log.info("tracectc= " + PCACommon.trace(centralCtC)); log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC))); log.info("traceye= " + PCACommon.trace(centralY)); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); int count = 1; // old = Inf; double old = Double.MAX_VALUE; // -------------------------- EM Iterations // while count Matrix centralX = null; int round = 0; while (round < MAX_ROUNDS && count > 0) { round++; // Sx = inv( eye(d) + CtC/ss ); Matrix Sx = eye(nPCs).times(ss).plus(centralCtC); Sx = inv(Sx); // X = Ye*C*(Sx/ss); centralX = centralY.times(centralC).times(Sx.transpose()); // XtX = X'*X + ss * Sx; Matrix centralXtX = centralX.transpose().times(centralX).plus(Sx.times(ss)); // C = (Ye'*X) / XtX; Matrix tmpInv = inv(centralXtX); centralC = centralY.transpose().times(centralX).times(tmpInv); // CtC = C'*C; centralCtC = centralC.transpose().times(centralC); // ss = ( sum(sum( (X*C'-Ye).^2 )) + trace(XtX*CtC) - 2*xcty ) /(N*D); double norm2 = centralY.clone().assign(new DoubleFunction() { @Override public double apply(double arg1) { return arg1 * arg1; } }).zSum(); ss = norm2 + PCACommon.trace(centralXtX.times(centralCtC)); //ss3 = sum (X(i:0) * C' * Y(i,:)') DenseVector resVector = new DenseVector(nCols); double xctyt = 0; for (int i = 0; i < nRows; i++) { PCACommon.vectorTimesMatrixTranspose(centralX.viewRow(i), centralC, resVector); double res = resVector.dot(centralY.viewRow(i)); xctyt += res; } ss -= 2 * xctyt; ss /= (nRows * nCols); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); double traceSx = PCACommon.trace(Sx); double traceX = PCACommon.trace(centralX); double traceSumXtX = PCACommon.trace(centralXtX); double traceC = PCACommon.trace(centralC); double traceCtC = PCACommon.trace(centralCtC); log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " " + traceCtC + " " + 0); double objective = ss; double rel_ch = Math.abs(1 - objective / old); old = objective; count++; if (rel_ch < threshold && count > 5) count = 0; log.info("Objective: %.6f relative change: %.6f \n", objective, rel_ch); } double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue(); log.info("Norm1 of Ye is: " + norm1Y); Matrix newYerror = centralY.minus(centralX.times(centralC.transpose())); double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue(); log.info("Norm1 of the reconstruction error is: " + norm1Err); initVal.C = centralC; initVal.ss = ss; return norm1Err / norm1Y; }