Example usage for org.apache.mahout.math Vector assign

List of usage examples for org.apache.mahout.math Vector assign

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector assign.

Prototype

Vector assign(DoubleDoubleFunction f, double y);

Source Link

Document

Apply the function to each element of the receiver, using the y value as the second argument of the DoubleDoubleFunction

Usage

From source file:com.innometrics.integration.app.recommender.ml.als.ImplicitFeedbackAlternatingLeastSquaresSolver.java

License:Apache License

/** Y' Cu p(u) */
private Matrix getYtransponseCuPu(Vector userRatings) {
    Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!");

    Vector YtransponseCuPu = new DenseVector(numFeatures);

    for (Element e : userRatings.nonZeroes()) {
        YtransponseCuPu.assign(Y.get(e.index()).times(confidence(e.get())), Functions.PLUS);
    }/*ww w .j  av  a  2 s.c  om*/

    return columnVectorAsMatrix(YtransponseCuPu);
}

From source file:com.scaleunlimited.classify.vectors.SetNormalizer.java

License:Apache License

public void normalize(Vector vector) {
    // First count the number of non-zero values.
    double valueCount = vector.getNumNonZeroElements();

    // Set each non-zero value to 1/count of non-zero values, so that
    // it's as if these all have a count of 1, so they have equal TF.
    vector.assign(new DoubleDoubleFunction() {

        @Override//from  ww  w .  j  a  v  a  2  s. c  o  m
        public double apply(double curValue, double normalizedValue) {
            return (curValue > 0.0 ? normalizedValue : 0);
        }

    }, 1.0 / valueCount);
}

From source file:com.scaleunlimited.classify.vectors.TfNormalizer.java

License:Apache License

@Override
public void normalize(Vector vector) {
    double totalCount = vector.zSum();
    vector.assign(new DoubleDoubleFunction() {

        @Override/* w  w w.  j  av  a  2s .c o  m*/
        public double apply(double curValue, double totalCount) {
            return curValue / totalCount;
        }
    }, totalCount);
}

From source file:com.skp.experiment.math.als.hadoop.DistributedImplicitFeedbackAlternatingLeastSquaresSolver.java

License:Apache License

public Vector solve(Vector userRatings) throws IOException {
    Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!");
    //Matrix sparseY = getSparseMatrix(userRatings);
    getSparseMatrix(userRatings);//from   w w  w  . ja  va 2s.co  m
    /* Y' (Cu - I) Y +  I */
    /* Y' Cu p(u) */
    Vector YtransponseCuPu = new DenseVector(numFeatures);
    /* (Cu -I) Y */
    OpenIntObjectHashMap<Vector> CuMinusIY = new OpenIntObjectHashMap<Vector>();

    Iterator<Vector.Element> ratings = userRatings.iterateNonZero();
    while (ratings.hasNext()) {
        Vector.Element e = ratings.next();
        CuMinusIY.put(e.index(), sparseY.get(e.index()).times(confidence(e.get()) - 1));
        /* Y' Cu p(u) */
        YtransponseCuPu.assign(sparseY.get(e.index()).times(confidence(e.get())), Functions.PLUS);
    }

    Matrix YtransponseCuMinusIY = new DenseMatrix(numFeatures, numFeatures);

    /* Y' (Cu -I) Y by outer products */
    ratings = userRatings.iterateNonZero();
    while (ratings.hasNext()) {
        Vector.Element e = ratings.next();
        for (Vector.Element feature : sparseY.get(e.index())) {
            Vector partial = CuMinusIY.get(e.index()).times(feature.get());
            YtransponseCuMinusIY.viewRow(feature.index()).assign(partial, Functions.PLUS);
        }
    }

    /* Y' (Cu - I) Y +  I  add lambda on the diagonal */
    for (int feature = 0; feature < numFeatures; feature++) {
        YtransponseCuMinusIY.setQuick(feature, feature,
                YtransponseCuMinusIY.getQuick(feature, feature) + lambda);
    }

    Matrix YtransposeCuPu = columnVectorAsMatrix(YtransponseCuPu);
    return solve(YtransposeY.plus(YtransponseCuMinusIY), YtransposeCuPu);
    //return YtransponseCuMinusIY;
}

From source file:com.skp.experiment.math.als.hadoop.ImplicitFeedbackAlternatingLeastSquaresSolver.java

License:Apache License

/** Y' Cu p(u) */
private Matrix YtransponseCuPu(Vector userRatings) {
    Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!");

    Vector YtransponseCuPu = new DenseVector(numFeatures);

    Iterator<Vector.Element> ratings = userRatings.iterateNonZero();
    while (ratings.hasNext()) {
        Vector.Element e = ratings.next();
        //YtransponseCuPu.assign(Y.get(e.index()).times(confidence(e.get())), Functions.PLUS);
        YtransponseCuPu.assign(Y.viewRow(e.index()).times(confidence(e.get())), Functions.PLUS);
    }//from w w w.j  a va2s.  c o  m

    return columnVectorAsMatrix(YtransponseCuPu);
}

From source file:com.ydy.cf.solver.impl.AlternatingLeastSquaresImplicitSolver.java

License:Apache License

/** Y' Cu p(u) */
private Matrix YtransponseCuPu(Vector userRatings) {
    Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!");

    Vector YtransponseCuPu = new DenseVector(numFeatures);

    Iterator<Vector.Element> ratings = userRatings.iterateNonZero();
    while (ratings.hasNext()) {
        Vector.Element e = ratings.next();
        Vector curYRow = Y.viewRow(e.index());
        YtransponseCuPu.assign(curYRow.times(confidence(e.get())), Functions.PLUS);
    }//  w  ww. j  a  v a 2 s  .co m

    return columnVectorAsMatrix(YtransponseCuPu);
}

From source file:de.tuberlin.dima.ml.pact.logreg.batchgd.GradientSumUp.java

License:Apache License

@Override
public void reduce(Iterator<Record> gradientParts, Collector<Record> out) throws Exception {

    // Start with values from first record
    Record first = gradientParts.next();
    IntValue modelKey = first.getField(IDX_MODEL_KEY, IntValue.class);
    Vector gradient = first.getField(IDX_GRADIENT_PART, PactVector.class).getValue();
    int total = first.getField(IDX_TOTAL, IntValue.class).getValue();
    int correct = first.getField(IDX_CORRECT, IntValue.class).getValue();
    Record record = null;/* ww  w  . j ava 2  s.c  om*/
    while (gradientParts.hasNext()) {
        // Gradient sum up
        record = gradientParts.next();
        Vector gradientPart = record.getField(IDX_GRADIENT_PART, PactVector.class).getValue();
        gradient.assign(gradientPart, Functions.PLUS);

        // In sample validation
        total += record.getField(IDX_TOTAL, IntValue.class).getValue();
        correct += record.getField(IDX_CORRECT, IntValue.class).getValue();
    }

    Record recordOut = new Record();
    recordOut.setField(ApplyGradient.IDX_INPUT2_MODEL_KEY, modelKey);
    recordOut.setField(ApplyGradient.IDX_INPUT2_GRADIENT, new PactVector(gradient));
    out.collect(recordOut);

    // TODO Forward Validation results
    System.out.println("--------\nIN-SAMPLE-VALIDATION\n--------");
    System.out.println("ACCURACY (training-data, last model): " + ((double) correct / (double) total) + " (= "
            + correct + " / " + total + ")");
    System.out.println("--------");
}

From source file:hadoop.api.AggregateAndRecommendReducer.java

License:Apache License

private void reduceBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values,
        Context context) throws IOException, InterruptedException {
    /* having boolean data, each estimated preference can only be 1,
     * however we can't use this to rank the recommended items,
     * so we use the sum of similarities for that. */
    Iterator<PrefAndSimilarityColumnWritable> columns = values.iterator();
    Vector predictions = columns.next().getSimilarityColumn();
    while (columns.hasNext()) {
        predictions.assign(columns.next().getSimilarityColumn(), Functions.PLUS);
    }/*  w w  w. j a  va2s .c o m*/
    writeRecommendedItems(userID, predictions, context);
}

From source file:hadoop.api.AggregateAndRecommendReducer.java

License:Apache License

private void reduceNonBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values,
        Context context) throws IOException, InterruptedException {
    /* each entry here is the sum in the numerator of the prediction formula */
    Vector numerators = null;/*from  w  ww.j  a v a  2s  .  c  o m*/
    /* each entry here is the sum in the denominator of the prediction formula */
    Vector denominators = null;
    /* each entry here is the number of similar items used in the prediction formula */
    Vector numberOfSimilarItemsUsed = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);

    for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) {
        Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn();
        float prefValue = prefAndSimilarityColumn.getPrefValue();
        /* count the number of items used for each prediction */
        for (Element e : simColumn.nonZeroes()) {
            int itemIDIndex = e.index();
            numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1);
        }

        if (denominators == null) {
            denominators = simColumn.clone();
        } else {
            denominators.assign(simColumn, Functions.PLUS_ABS);
        }

        if (numerators == null) {
            numerators = simColumn.clone();
            if (prefValue != BOOLEAN_PREF_VALUE) {
                numerators.assign(Functions.MULT, prefValue);
            }
        } else {
            if (prefValue != BOOLEAN_PREF_VALUE) {
                simColumn.assign(Functions.MULT, prefValue);
            }
            numerators.assign(simColumn, Functions.PLUS);
        }

    }

    if (numerators == null) {
        return;
    }

    Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
    for (Element element : numerators.nonZeroes()) {
        int itemIDIndex = element.index();
        /* preference estimations must be based on at least 2 datapoints */
        if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) {
            /* compute normalized prediction */
            double prediction = element.get() / denominators.getQuick(itemIDIndex);
            recommendationVector.setQuick(itemIDIndex, prediction);
        }
    }
    writeRecommendedItems(userID, recommendationVector, context);
}

From source file:mlbench.bayes.train.IndexInstances.java

License:Apache License

@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
    parseArgs(args);//from   w  w w.j  a v  a2  s.c o  m
    HashMap<String, String> conf = new HashMap<String, String>();
    initConf(conf);
    MPI_D.Init(args, MPI_D.Mode.Common, conf);
    if (MPI_D.COMM_BIPARTITE_O != null) {
        rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);

        if (rank == 0) {
            System.out.println(IndexInstances.class.getSimpleName() + " O start.");
            createLabelIndex(labPath);
        }

        HadoopUtil.cacheFiles(labPath, config);

        MPI_D.COMM_BIPARTITE_O.Barrier();

        OpenObjectIntHashMap<String> labelIndex = BayesUtils.readIndexFromCache(config);

        if (MPI_D.COMM_BIPARTITE_O != null) {
            // O communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
            FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                    (JobConf) config, inDir, rank);
            for (int i = 0; i < inputs.length; i++) {
                FileSplit fsplit = inputs[i];
                SequenceFileRecordReader<Text, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
                        fsplit);
                Text labelText = kvrr.createKey();
                VectorWritable instance = kvrr.createValue();
                while (kvrr.next(labelText, instance)) {
                    String label = SLASH.split(labelText.toString())[1];
                    if (labelIndex.containsKey(label)) {
                        MPI_D.Send(new IntWritable(labelIndex.get(label)), instance);
                    }
                }
            }
        }
    } else if (MPI_D.COMM_BIPARTITE_A != null) {
        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
        config.set(MAPRED_OUTPUT_DIR, outDir);
        config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
        ((JobConf) config).setOutputKeyClass(IntWritable.class);
        ((JobConf) config).setOutputValueClass(VectorWritable.class);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
                DataMPIUtil.getHadoopTaskAttemptID());
        SequenceFileOutputFormat<IntWritable, VectorWritable> outfile = new SequenceFileOutputFormat<>();
        FileSystem fs = FileSystem.get(config);

        Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
        FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
        RecordWriter<IntWritable, VectorWritable> outrw = null;
        try {
            fcommitter.setupJob(taskContext);
            outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("ERROR: Please set the HDFS configuration properly\n");
            System.exit(-1);
        }

        IntWritable key = null, newKey = null;
        VectorWritable point = null, newPoint = null;
        Vector vector = null;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            newKey = (IntWritable) vals[0];
            newPoint = (VectorWritable) vals[1];
            if (key == null && point == null) {
            } else if (!key.equals(newKey)) {
                outrw.write(key, new VectorWritable(vector));
                vector = null;
            }
            if (vector == null) {
                vector = newPoint.get();
            } else {
                vector.assign(newPoint.get(), Functions.PLUS);
            }

            key = newKey;
            point = newPoint;
            vals = MPI_D.Recv();
        }
        if (newKey != null && newPoint != null) {
            outrw.write(key, new VectorWritable(vector));
        }

        outrw.close(null);
        if (fcommitter.needsTaskCommit(taskContext)) {
            fcommitter.commitTask(taskContext);
        }
    }

    MPI_D.Finalize();
}