org.swjtu.helloworldcn.ComputeAvailabilitiesJob.java Source code

Java tutorial

Introduction

Here is the source code for org.swjtu.helloworldcn.ComputeAvailabilitiesJob.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.swjtu.helloworldcn;

import java.io.IOException;
import java.net.URI;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.mahout.math.MatrixSlice;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.Vector.Element;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.hadoop.DistributedRowMatrix;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** 
* @author Tang
* @since 2012-3
* */
@Deprecated
public final class ComputeAvailabilitiesJob {
    public final static String R_PATH = "R_PATH";
    public final static String ST_PATH = "ST_PATH";
    public final static String TMP_ST_PATH = "TMP_ST_PATH";
    public final static String TMP_R_PATH = "TMP_R_PATH";
    public final static String COL_NUMS = "COL_NUMS";
    private static final Logger log = LoggerFactory.getLogger(ComputeAvailabilitiesJob.class);

    private ComputeAvailabilitiesJob() {
    }

    public static DistributedRowMatrix runJob(DistributedRowMatrix A, DistributedRowMatrix R,
            DistributedRowMatrix st, Path outputPath, Path tmpPath)
            throws IOException, ClassNotFoundException, InterruptedException {

        // set up the serialization of the diagonal vector
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        Path APath = fs.makeQualified(A.getRowPath());

        conf.set(ST_PATH, st.getRowPath().toUri().toString());
        conf.set(R_PATH, A.getRowPath().toUri().toString());
        conf.set(TMP_ST_PATH, st.getOutputTempPath().toUri().toString());
        conf.set(TMP_R_PATH, A.getOutputTempPath().toUri().toString());
        conf.set(COL_NUMS, A.numRows() + "");

        outputPath = fs.makeQualified(outputPath);

        /*
         * VectorCache.save(new IntWritable(EigencutsKeys.DIAGONAL_CACHE_INDEX),
         * diag, vectorOutputPath, conf);
         */

        // set up the job itself
        Job job = new Job(conf, "ComputeAvailabilities");
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(VectorWritable.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setMapperClass(ComputeAvailabilitiesMapper.class);
        job.setNumReduceTasks(0);

        FileInputFormat.addInputPath(job, APath);
        FileOutputFormat.setOutputPath(job, outputPath);

        job.setJarByClass(ComputeAvailabilitiesJob.class);

        job.waitForCompletion(true);

        // build the resulting DRM from the results
        return new DistributedRowMatrix(outputPath, tmpPath, A.numRows(), A.numCols());
    }

    public static class ComputeAvailabilitiesMapper
            extends Mapper<IntWritable, VectorWritable, IntWritable, VectorWritable> {

        private DistributedRowMatrix R;
        private DistributedRowMatrix st;
        private int colnums;
        private Iterator<MatrixSlice> iteratorst;
        private Iterator<MatrixSlice> iteratorR;

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            // read in the diagonal vector from the distributed cache
            super.setup(context);

            Configuration config = context.getConfiguration();
            Path stPath = new Path(URI.create(config.get(ComputeAvailabilitiesJob.ST_PATH)));
            Path RPath = new Path(URI.create(config.get(ComputeAvailabilitiesJob.R_PATH)));
            Path sttmpPath = new Path(URI.create(config.get(ComputeAvailabilitiesJob.TMP_ST_PATH)));
            Path RtmpPath = new Path(URI.create(config.get(ComputeAvailabilitiesJob.TMP_R_PATH)));
            colnums = Integer.parseInt(config.get(ComputeAvailabilitiesJob.COL_NUMS));
            Configuration depConf = new Configuration(config);

            st = new DistributedRowMatrix(stPath, sttmpPath, colnums, colnums);
            R = new DistributedRowMatrix(RPath, RtmpPath, colnums, colnums);
            R.setConf(config);
            st.setConf(config);
            iteratorst = st.iterateAll();
            iteratorR = R.iterateAll();

        }

        @Override
        protected void map(IntWritable key, VectorWritable row, Context ctx)
                throws IOException, InterruptedException {
            Vector aVector = row.get();
            int rownum = key.get();
            Vector strowVector = null;
            Vector rRowVector = null;

            while (strowVector == null) {
                while (iteratorst.hasNext()) {
                    MatrixSlice matrixSlice = iteratorst.next();
                    if (matrixSlice.index() == rownum) {
                        strowVector = matrixSlice.vector();
                        break;
                    }
                }
                //not find, refind from first
                if (strowVector == null) {
                    iteratorst = st.iterateAll();
                }
            }

            while (rRowVector == null) {
                while (iteratorR.hasNext()) {
                    MatrixSlice matrixSlice = iteratorR.next();
                    if (matrixSlice.index() == rownum) {
                        rRowVector = matrixSlice.vector();
                        break;
                    }
                }
                if (rRowVector == null) {
                    iteratorR = R.iterateAll();
                }

            }
            Iterator<Element> iter = rRowVector.iterateNonZero();
            while (iter.hasNext()) {
                Element element = iter.next();
                if (element.index() == rownum) {
                    continue;
                }
                if (element.get() < 0.0) {
                    element.set(0.0);
                }
            }

            double rSum = rRowVector.zSum();
            for (int i = 0; i < aVector.size(); i++) {
                aVector.setQuick(i, rSum - rRowVector.getQuick(i));
            }

            double dA = aVector.get(rownum);
            Iterator<Element> itera = aVector.iterateNonZero();
            while (itera.hasNext()) {
                Element element = itera.next();
                if (element.index() == rownum) {
                    continue;
                }
                if (element.get() > 0) {
                    element.set(0.0);
                }
            }
            aVector.set(rownum, dA);
            aVector = aVector.times(1 - 0.5).plus(row.get().times(0.5));
            row.set(aVector);

            ctx.write(key, row);
        }

    }
}