List of usage examples for org.apache.mahout.math Vector assign
Vector assign(DoubleDoubleFunction f, double y);
From source file:com.innometrics.integration.app.recommender.ml.als.ImplicitFeedbackAlternatingLeastSquaresSolver.java
License:Apache License
/** Y' Cu p(u) */ private Matrix getYtransponseCuPu(Vector userRatings) { Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!"); Vector YtransponseCuPu = new DenseVector(numFeatures); for (Element e : userRatings.nonZeroes()) { YtransponseCuPu.assign(Y.get(e.index()).times(confidence(e.get())), Functions.PLUS); }/*ww w .j av a 2 s.c om*/ return columnVectorAsMatrix(YtransponseCuPu); }
From source file:com.scaleunlimited.classify.vectors.SetNormalizer.java
License:Apache License
public void normalize(Vector vector) { // First count the number of non-zero values. double valueCount = vector.getNumNonZeroElements(); // Set each non-zero value to 1/count of non-zero values, so that // it's as if these all have a count of 1, so they have equal TF. vector.assign(new DoubleDoubleFunction() { @Override//from ww w . j a v a 2 s. c o m public double apply(double curValue, double normalizedValue) { return (curValue > 0.0 ? normalizedValue : 0); } }, 1.0 / valueCount); }
From source file:com.scaleunlimited.classify.vectors.TfNormalizer.java
License:Apache License
@Override public void normalize(Vector vector) { double totalCount = vector.zSum(); vector.assign(new DoubleDoubleFunction() { @Override/* w w w. j av a 2s .c o m*/ public double apply(double curValue, double totalCount) { return curValue / totalCount; } }, totalCount); }
From source file:com.skp.experiment.math.als.hadoop.DistributedImplicitFeedbackAlternatingLeastSquaresSolver.java
License:Apache License
public Vector solve(Vector userRatings) throws IOException { Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!"); //Matrix sparseY = getSparseMatrix(userRatings); getSparseMatrix(userRatings);//from w w w . ja va 2s.co m /* Y' (Cu - I) Y + I */ /* Y' Cu p(u) */ Vector YtransponseCuPu = new DenseVector(numFeatures); /* (Cu -I) Y */ OpenIntObjectHashMap<Vector> CuMinusIY = new OpenIntObjectHashMap<Vector>(); Iterator<Vector.Element> ratings = userRatings.iterateNonZero(); while (ratings.hasNext()) { Vector.Element e = ratings.next(); CuMinusIY.put(e.index(), sparseY.get(e.index()).times(confidence(e.get()) - 1)); /* Y' Cu p(u) */ YtransponseCuPu.assign(sparseY.get(e.index()).times(confidence(e.get())), Functions.PLUS); } Matrix YtransponseCuMinusIY = new DenseMatrix(numFeatures, numFeatures); /* Y' (Cu -I) Y by outer products */ ratings = userRatings.iterateNonZero(); while (ratings.hasNext()) { Vector.Element e = ratings.next(); for (Vector.Element feature : sparseY.get(e.index())) { Vector partial = CuMinusIY.get(e.index()).times(feature.get()); YtransponseCuMinusIY.viewRow(feature.index()).assign(partial, Functions.PLUS); } } /* Y' (Cu - I) Y + I add lambda on the diagonal */ for (int feature = 0; feature < numFeatures; feature++) { YtransponseCuMinusIY.setQuick(feature, feature, YtransponseCuMinusIY.getQuick(feature, feature) + lambda); } Matrix YtransposeCuPu = columnVectorAsMatrix(YtransponseCuPu); return solve(YtransposeY.plus(YtransponseCuMinusIY), YtransposeCuPu); //return YtransponseCuMinusIY; }
From source file:com.skp.experiment.math.als.hadoop.ImplicitFeedbackAlternatingLeastSquaresSolver.java
License:Apache License
/** Y' Cu p(u) */ private Matrix YtransponseCuPu(Vector userRatings) { Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!"); Vector YtransponseCuPu = new DenseVector(numFeatures); Iterator<Vector.Element> ratings = userRatings.iterateNonZero(); while (ratings.hasNext()) { Vector.Element e = ratings.next(); //YtransponseCuPu.assign(Y.get(e.index()).times(confidence(e.get())), Functions.PLUS); YtransponseCuPu.assign(Y.viewRow(e.index()).times(confidence(e.get())), Functions.PLUS); }//from w w w.j a va2s. c o m return columnVectorAsMatrix(YtransponseCuPu); }
From source file:com.ydy.cf.solver.impl.AlternatingLeastSquaresImplicitSolver.java
License:Apache License
/** Y' Cu p(u) */ private Matrix YtransponseCuPu(Vector userRatings) { Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!"); Vector YtransponseCuPu = new DenseVector(numFeatures); Iterator<Vector.Element> ratings = userRatings.iterateNonZero(); while (ratings.hasNext()) { Vector.Element e = ratings.next(); Vector curYRow = Y.viewRow(e.index()); YtransponseCuPu.assign(curYRow.times(confidence(e.get())), Functions.PLUS); }// w ww. j a v a 2 s .co m return columnVectorAsMatrix(YtransponseCuPu); }
From source file:de.tuberlin.dima.ml.pact.logreg.batchgd.GradientSumUp.java
License:Apache License
@Override public void reduce(Iterator<Record> gradientParts, Collector<Record> out) throws Exception { // Start with values from first record Record first = gradientParts.next(); IntValue modelKey = first.getField(IDX_MODEL_KEY, IntValue.class); Vector gradient = first.getField(IDX_GRADIENT_PART, PactVector.class).getValue(); int total = first.getField(IDX_TOTAL, IntValue.class).getValue(); int correct = first.getField(IDX_CORRECT, IntValue.class).getValue(); Record record = null;/* ww w . j ava 2 s.c om*/ while (gradientParts.hasNext()) { // Gradient sum up record = gradientParts.next(); Vector gradientPart = record.getField(IDX_GRADIENT_PART, PactVector.class).getValue(); gradient.assign(gradientPart, Functions.PLUS); // In sample validation total += record.getField(IDX_TOTAL, IntValue.class).getValue(); correct += record.getField(IDX_CORRECT, IntValue.class).getValue(); } Record recordOut = new Record(); recordOut.setField(ApplyGradient.IDX_INPUT2_MODEL_KEY, modelKey); recordOut.setField(ApplyGradient.IDX_INPUT2_GRADIENT, new PactVector(gradient)); out.collect(recordOut); // TODO Forward Validation results System.out.println("--------\nIN-SAMPLE-VALIDATION\n--------"); System.out.println("ACCURACY (training-data, last model): " + ((double) correct / (double) total) + " (= " + correct + " / " + total + ")"); System.out.println("--------"); }
From source file:hadoop.api.AggregateAndRecommendReducer.java
License:Apache License
private void reduceBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values, Context context) throws IOException, InterruptedException { /* having boolean data, each estimated preference can only be 1, * however we can't use this to rank the recommended items, * so we use the sum of similarities for that. */ Iterator<PrefAndSimilarityColumnWritable> columns = values.iterator(); Vector predictions = columns.next().getSimilarityColumn(); while (columns.hasNext()) { predictions.assign(columns.next().getSimilarityColumn(), Functions.PLUS); }/* w w w. j a va2s .c o m*/ writeRecommendedItems(userID, predictions, context); }
From source file:hadoop.api.AggregateAndRecommendReducer.java
License:Apache License
private void reduceNonBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values, Context context) throws IOException, InterruptedException { /* each entry here is the sum in the numerator of the prediction formula */ Vector numerators = null;/*from w ww.j a v a 2s . c o m*/ /* each entry here is the sum in the denominator of the prediction formula */ Vector denominators = null; /* each entry here is the number of similar items used in the prediction formula */ Vector numberOfSimilarItemsUsed = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) { Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn(); float prefValue = prefAndSimilarityColumn.getPrefValue(); /* count the number of items used for each prediction */ for (Element e : simColumn.nonZeroes()) { int itemIDIndex = e.index(); numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1); } if (denominators == null) { denominators = simColumn.clone(); } else { denominators.assign(simColumn, Functions.PLUS_ABS); } if (numerators == null) { numerators = simColumn.clone(); if (prefValue != BOOLEAN_PREF_VALUE) { numerators.assign(Functions.MULT, prefValue); } } else { if (prefValue != BOOLEAN_PREF_VALUE) { simColumn.assign(Functions.MULT, prefValue); } numerators.assign(simColumn, Functions.PLUS); } } if (numerators == null) { return; } Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (Element element : numerators.nonZeroes()) { int itemIDIndex = element.index(); /* preference estimations must be based on at least 2 datapoints */ if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) { /* compute normalized prediction */ double prediction = element.get() / denominators.getQuick(itemIDIndex); recommendationVector.setQuick(itemIDIndex, prediction); } } writeRecommendedItems(userID, recommendationVector, context); }
From source file:mlbench.bayes.train.IndexInstances.java
License:Apache License
@SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException { parseArgs(args);//from w w w.j a v a2 s.c o m HashMap<String, String> conf = new HashMap<String, String>(); initConf(conf); MPI_D.Init(args, MPI_D.Mode.Common, conf); if (MPI_D.COMM_BIPARTITE_O != null) { rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); if (rank == 0) { System.out.println(IndexInstances.class.getSimpleName() + " O start."); createLabelIndex(labPath); } HadoopUtil.cacheFiles(labPath, config); MPI_D.COMM_BIPARTITE_O.Barrier(); OpenObjectIntHashMap<String> labelIndex = BayesUtils.readIndexFromCache(config); if (MPI_D.COMM_BIPARTITE_O != null) { // O communicator int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, (JobConf) config, inDir, rank); for (int i = 0; i < inputs.length; i++) { FileSplit fsplit = inputs[i]; SequenceFileRecordReader<Text, VectorWritable> kvrr = new SequenceFileRecordReader<>(config, fsplit); Text labelText = kvrr.createKey(); VectorWritable instance = kvrr.createValue(); while (kvrr.next(labelText, instance)) { String label = SLASH.split(labelText.toString())[1]; if (labelIndex.containsKey(label)) { MPI_D.Send(new IntWritable(labelIndex.get(label)), instance); } } } } } else if (MPI_D.COMM_BIPARTITE_A != null) { int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A); config.set(MAPRED_OUTPUT_DIR, outDir); config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString()); ((JobConf) config).setOutputKeyClass(IntWritable.class); ((JobConf) config).setOutputValueClass(VectorWritable.class); TaskAttemptContext taskContext = new TaskAttemptContextImpl(config, DataMPIUtil.getHadoopTaskAttemptID()); SequenceFileOutputFormat<IntWritable, VectorWritable> outfile = new SequenceFileOutputFormat<>(); FileSystem fs = FileSystem.get(config); Path output = new Path(config.get(MAPRED_OUTPUT_DIR)); FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext); RecordWriter<IntWritable, VectorWritable> outrw = null; try { fcommitter.setupJob(taskContext); outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null); } catch (IOException e) { e.printStackTrace(); System.err.println("ERROR: Please set the HDFS configuration properly\n"); System.exit(-1); } IntWritable key = null, newKey = null; VectorWritable point = null, newPoint = null; Vector vector = null; Object[] vals = MPI_D.Recv(); while (vals != null) { newKey = (IntWritable) vals[0]; newPoint = (VectorWritable) vals[1]; if (key == null && point == null) { } else if (!key.equals(newKey)) { outrw.write(key, new VectorWritable(vector)); vector = null; } if (vector == null) { vector = newPoint.get(); } else { vector.assign(newPoint.get(), Functions.PLUS); } key = newKey; point = newPoint; vals = MPI_D.Recv(); } if (newKey != null && newPoint != null) { outrw.write(key, new VectorWritable(vector)); } outrw.close(null); if (fcommitter.needsTaskCommit(taskContext)) { fcommitter.commitTask(taskContext); } } MPI_D.Finalize(); }