Example usage for org.apache.mahout.math Vector assign

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector assign.

Prototype

Vector assign(DoubleDoubleFunction f, double y);

Source Link

Document

Apply the function to each element of the receiver, using the y value as the second argument of the DoubleDoubleFunction

Usage

From source file:com.innometrics.integration.app.recommender.ml.als.ImplicitFeedbackAlternatingLeastSquaresSolver.java

License:Apache License

/** Y' Cu p(u) */
private Matrix getYtransponseCuPu(Vector userRatings) {
    Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!");

    Vector YtransponseCuPu = new DenseVector(numFeatures);

    for (Element e : userRatings.nonZeroes()) {
        YtransponseCuPu.assign(Y.get(e.index()).times(confidence(e.get())), Functions.PLUS);
    }/*ww w .j  av  a  2 s.c  om*/

    return columnVectorAsMatrix(YtransponseCuPu);
}

From source file:com.scaleunlimited.classify.vectors.SetNormalizer.java

License:Apache License

public void normalize(Vector vector) {
    // First count the number of non-zero values.
    double valueCount = vector.getNumNonZeroElements();

    // Set each non-zero value to 1/count of non-zero values, so that
    // it's as if these all have a count of 1, so they have equal TF.
    vector.assign(new DoubleDoubleFunction() {

        @Override//from  ww  w .  j  a  v  a  2  s. c  o  m
        public double apply(double curValue, double normalizedValue) {
            return (curValue > 0.0 ? normalizedValue : 0);
        }

    }, 1.0 / valueCount);
}

From source file:com.scaleunlimited.classify.vectors.TfNormalizer.java

License:Apache License

@Override
public void normalize(Vector vector) {
    double totalCount = vector.zSum();
    vector.assign(new DoubleDoubleFunction() {

        @Override/* w  w w.  j  av  a  2s .c o  m*/
        public double apply(double curValue, double totalCount) {
            return curValue / totalCount;
        }
    }, totalCount);
}

From source file:com.skp.experiment.math.als.hadoop.DistributedImplicitFeedbackAlternatingLeastSquaresSolver.java

License:Apache License

public Vector solve(Vector userRatings) throws IOException {
    Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!");
    //Matrix sparseY = getSparseMatrix(userRatings);
    getSparseMatrix(userRatings);//from   w w  w  . ja  va 2s.co  m
    /* Y' (Cu - I) Y +  I */
    /* Y' Cu p(u) */
    Vector YtransponseCuPu = new DenseVector(numFeatures);
    /* (Cu -I) Y */
    OpenIntObjectHashMap<Vector> CuMinusIY = new OpenIntObjectHashMap<Vector>();

    Iterator<Vector.Element> ratings = userRatings.iterateNonZero();
    while (ratings.hasNext()) {
        Vector.Element e = ratings.next();
        CuMinusIY.put(e.index(), sparseY.get(e.index()).times(confidence(e.get()) - 1));
        /* Y' Cu p(u) */
        YtransponseCuPu.assign(sparseY.get(e.index()).times(confidence(e.get())), Functions.PLUS);
    }

    Matrix YtransponseCuMinusIY = new DenseMatrix(numFeatures, numFeatures);

    /* Y' (Cu -I) Y by outer products */
    ratings = userRatings.iterateNonZero();
    while (ratings.hasNext()) {
        Vector.Element e = ratings.next();
        for (Vector.Element feature : sparseY.get(e.index())) {
            Vector partial = CuMinusIY.get(e.index()).times(feature.get());
            YtransponseCuMinusIY.viewRow(feature.index()).assign(partial, Functions.PLUS);
        }
    }

    /* Y' (Cu - I) Y +  I  add lambda on the diagonal */
    for (int feature = 0; feature < numFeatures; feature++) {
        YtransponseCuMinusIY.setQuick(feature, feature,
                YtransponseCuMinusIY.getQuick(feature, feature) + lambda);
    }

    Matrix YtransposeCuPu = columnVectorAsMatrix(YtransponseCuPu);
    return solve(YtransposeY.plus(YtransponseCuMinusIY), YtransposeCuPu);
    //return YtransponseCuMinusIY;
}

From source file:com.skp.experiment.math.als.hadoop.ImplicitFeedbackAlternatingLeastSquaresSolver.java

License:Apache License

/** Y' Cu p(u) */
private Matrix YtransponseCuPu(Vector userRatings) {
    Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!");

    Vector YtransponseCuPu = new DenseVector(numFeatures);

    Iterator<Vector.Element> ratings = userRatings.iterateNonZero();
    while (ratings.hasNext()) {
        Vector.Element e = ratings.next();
        //YtransponseCuPu.assign(Y.get(e.index()).times(confidence(e.get())), Functions.PLUS);
        YtransponseCuPu.assign(Y.viewRow(e.index()).times(confidence(e.get())), Functions.PLUS);
    }//from w w w.j  a va2s.  c o  m

    return columnVectorAsMatrix(YtransponseCuPu);
}

From source file:com.ydy.cf.solver.impl.AlternatingLeastSquaresImplicitSolver.java

License:Apache License

/** Y' Cu p(u) */
private Matrix YtransponseCuPu(Vector userRatings) {
    Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!");

    Vector YtransponseCuPu = new DenseVector(numFeatures);

    Iterator<Vector.Element> ratings = userRatings.iterateNonZero();
    while (ratings.hasNext()) {
        Vector.Element e = ratings.next();
        Vector curYRow = Y.viewRow(e.index());
        YtransponseCuPu.assign(curYRow.times(confidence(e.get())), Functions.PLUS);
    }//  w  ww. j  a  v a 2 s  .co m

    return columnVectorAsMatrix(YtransponseCuPu);
}

From source file:de.tuberlin.dima.ml.pact.logreg.batchgd.GradientSumUp.java

License:Apache License

@Override
public void reduce(Iterator<Record> gradientParts, Collector<Record> out) throws Exception {

    // Start with values from first record
    Record first = gradientParts.next();
    IntValue modelKey = first.getField(IDX_MODEL_KEY, IntValue.class);
    Vector gradient = first.getField(IDX_GRADIENT_PART, PactVector.class).getValue();
    int total = first.getField(IDX_TOTAL, IntValue.class).getValue();
    int correct = first.getField(IDX_CORRECT, IntValue.class).getValue();
    Record record = null;/* ww  w  . j ava 2  s.c  om*/
    while (gradientParts.hasNext()) {
        // Gradient sum up
        record = gradientParts.next();
        Vector gradientPart = record.getField(IDX_GRADIENT_PART, PactVector.class).getValue();
        gradient.assign(gradientPart, Functions.PLUS);

        // In sample validation
        total += record.getField(IDX_TOTAL, IntValue.class).getValue();
        correct += record.getField(IDX_CORRECT, IntValue.class).getValue();
    }

    Record recordOut = new Record();
    recordOut.setField(ApplyGradient.IDX_INPUT2_MODEL_KEY, modelKey);
    recordOut.setField(ApplyGradient.IDX_INPUT2_GRADIENT, new PactVector(gradient));
    out.collect(recordOut);

    // TODO Forward Validation results
    System.out.println("--------\nIN-SAMPLE-VALIDATION\n--------");
    System.out.println("ACCURACY (training-data, last model): " + ((double) correct / (double) total) + " (= "
            + correct + " / " + total + ")");
    System.out.println("--------");
}

From source file:hadoop.api.AggregateAndRecommendReducer.java

License:Apache License

private void reduceBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values,
        Context context) throws IOException, InterruptedException {
    /* having boolean data, each estimated preference can only be 1,
     * however we can't use this to rank the recommended items,
     * so we use the sum of similarities for that. */
    Iterator<PrefAndSimilarityColumnWritable> columns = values.iterator();
    Vector predictions = columns.next().getSimilarityColumn();
    while (columns.hasNext()) {
        predictions.assign(columns.next().getSimilarityColumn(), Functions.PLUS);
    }/*  w w  w. j a  va2s .c o m*/
    writeRecommendedItems(userID, predictions, context);
}

From source file:hadoop.api.AggregateAndRecommendReducer.java

License:Apache License

private void reduceNonBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values,
        Context context) throws IOException, InterruptedException {
    /* each entry here is the sum in the numerator of the prediction formula */
    Vector numerators = null;/*from  w  ww.j  a v a  2s  .  c  o m*/
    /* each entry here is the sum in the denominator of the prediction formula */
    Vector denominators = null;
    /* each entry here is the number of similar items used in the prediction formula */
    Vector numberOfSimilarItemsUsed = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);

    for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) {
        Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn();
        float prefValue = prefAndSimilarityColumn.getPrefValue();
        /* count the number of items used for each prediction */
        for (Element e : simColumn.nonZeroes()) {
            int itemIDIndex = e.index();
            numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1);
        }

        if (denominators == null) {
            denominators = simColumn.clone();
        } else {
            denominators.assign(simColumn, Functions.PLUS_ABS);
        }

        if (numerators == null) {
            numerators = simColumn.clone();
            if (prefValue != BOOLEAN_PREF_VALUE) {
                numerators.assign(Functions.MULT, prefValue);
            }
        } else {
            if (prefValue != BOOLEAN_PREF_VALUE) {
                simColumn.assign(Functions.MULT, prefValue);
            }
            numerators.assign(simColumn, Functions.PLUS);
        }

    }

    if (numerators == null) {
        return;
    }

    Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
    for (Element element : numerators.nonZeroes()) {
        int itemIDIndex = element.index();
        /* preference estimations must be based on at least 2 datapoints */
        if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) {
            /* compute normalized prediction */
            double prediction = element.get() / denominators.getQuick(itemIDIndex);
            recommendationVector.setQuick(itemIDIndex, prediction);
        }
    }
    writeRecommendedItems(userID, recommendationVector, context);
}

From source file:mlbench.bayes.train.IndexInstances.java

License:Apache License

@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
    parseArgs(args);//from   w  w w.j  a v  a2  s.c o  m
    HashMap<String, String> conf = new HashMap<String, String>();
    initConf(conf);
    MPI_D.Init(args, MPI_D.Mode.Common, conf);
    if (MPI_D.COMM_BIPARTITE_O != null) {
        rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);

        if (rank == 0) {
            System.out.println(IndexInstances.class.getSimpleName() + " O start.");
            createLabelIndex(labPath);
        }

        HadoopUtil.cacheFiles(labPath, config);

        MPI_D.COMM_BIPARTITE_O.Barrier();

        OpenObjectIntHashMap<String> labelIndex = BayesUtils.readIndexFromCache(config);

        if (MPI_D.COMM_BIPARTITE_O != null) {
            // O communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
            FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                    (JobConf) config, inDir, rank);
            for (int i = 0; i < inputs.length; i++) {
                FileSplit fsplit = inputs[i];
                SequenceFileRecordReader<Text, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
                        fsplit);
                Text labelText = kvrr.createKey();
                VectorWritable instance = kvrr.createValue();
                while (kvrr.next(labelText, instance)) {
                    String label = SLASH.split(labelText.toString())[1];
                    if (labelIndex.containsKey(label)) {
                        MPI_D.Send(new IntWritable(labelIndex.get(label)), instance);
                    }
                }
            }
        }
    } else if (MPI_D.COMM_BIPARTITE_A != null) {
        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
        config.set(MAPRED_OUTPUT_DIR, outDir);
        config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
        ((JobConf) config).setOutputKeyClass(IntWritable.class);
        ((JobConf) config).setOutputValueClass(VectorWritable.class);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
                DataMPIUtil.getHadoopTaskAttemptID());
        SequenceFileOutputFormat<IntWritable, VectorWritable> outfile = new SequenceFileOutputFormat<>();
        FileSystem fs = FileSystem.get(config);

        Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
        FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
        RecordWriter<IntWritable, VectorWritable> outrw = null;
        try {
            fcommitter.setupJob(taskContext);
            outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("ERROR: Please set the HDFS configuration properly\n");
            System.exit(-1);
        }

        IntWritable key = null, newKey = null;
        VectorWritable point = null, newPoint = null;
        Vector vector = null;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            newKey = (IntWritable) vals[0];
            newPoint = (VectorWritable) vals[1];
            if (key == null && point == null) {
            } else if (!key.equals(newKey)) {
                outrw.write(key, new VectorWritable(vector));
                vector = null;
            }
            if (vector == null) {
                vector = newPoint.get();
            } else {
                vector.assign(newPoint.get(), Functions.PLUS);
            }

            key = newKey;
            point = newPoint;
            vals = MPI_D.Recv();
        }
        if (newKey != null && newPoint != null) {
            outrw.write(key, new VectorWritable(vector));
        }

        outrw.close(null);
        if (fcommitter.needsTaskCommit(taskContext)) {
            fcommitter.commitTask(taskContext);
        }
    }

    MPI_D.Finalize();
}