edu.udel.mxv.MxvMap.java Source code

Java tutorial

Introduction

Here is the source code for edu.udel.mxv.MxvMap.java

Source

/*  
 * Copyright (C) 2014 Computer Architecture and Parallel Systems Laboratory (CAPSL) 
 *
 * Original author: Sergio Pino 
 * E-Mail: sergiop@udel.edu
 *
 * License
 *  
 * Redistribution of this code is allowed only after an explicit permission is
 * given by the original author or CAPSL and this license should be included in
 * all files, either existing or new ones. Modifying the code is allowed, but
 * the original author and/or CAPSL must be notified about these modifications.
 * The original author and/or CAPSL is also allowed to use these modifications
 * and publicly report results that include them. Appropriate acknowledgments
 * to everyone who made the modifications will be added in this case.
 *
 * Warranty 
 *
 * THIS CODE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND,
 * EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT
 * THE COVERED CODE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR
 * PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
 * OF THE COVERED CODE IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN
 * ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME
 * THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER
 * OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY
 * COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER.
 */

package edu.udel.mxv;

import java.io.DataInputStream;
import java.io.IOException;
import java.net.URI;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * Performs the multiplication between the aij and the xi
 * 
 * The vector x is loaded in each map task in the setup method. Here it is assume that the size of 
 * the vector x can fit in the mapper memory.
 * 
 * Each mapper loads the vector.
 * 
 * @author Sergio Pino sergiop@udel.edu
 */
public class MxvMap extends Mapper<LongWritable, Text, IntWritable, DoubleWritable> {

    private static final Log LOG = LogFactory.getLog(MxvMap.class);
    private double[] x_i;

    private final IntWritable row = new IntWritable();
    private final DoubleWritable aij_x_vj = new DoubleWritable();

    @Override
    protected void setup(Mapper<LongWritable, Text, IntWritable, DoubleWritable>.Context context)
            throws IOException, InterruptedException {

        Configuration conf = context.getConfiguration();

        String input_vector = conf.get("vector.path");
        x_i = new double[conf.getInt("vector.n", 0)];

        FileSystem fs = FileSystem.get(URI.create(input_vector), conf);
        FileStatus[] status = fs.listStatus(new Path(input_vector));
        for (int i = 0; i < status.length; ++i) {
            Path file = status[i].getPath();
            System.out.println("status: " + i + " " + file.toString());

            DataInputStream dis = new DataInputStream(fs.open(file));

            String line = null;
            int count = 0;
            while ((line = dis.readLine()) != null) {
                String[] split_line = line.split(",");
                if (split_line.length == 2) {
                    int pos = Integer.parseInt(split_line[0]);
                    double val = Double.parseDouble(split_line[1]);
                    x_i[pos++] = val;
                    count++;
                } else
                    LOG.error("Parse error in line: " + line);
            }

            LOG.info("Number of elements read for vector = " + count);
        }
    }

    @Override
    protected void map(LongWritable key, Text value,
            Mapper<LongWritable, Text, IntWritable, DoubleWritable>.Context context)
            throws IOException, InterruptedException {

        String[] split_line = value.toString().split(",");

        if (split_line.length == 3) {

            int i = Integer.parseInt(split_line[0]);
            int j = Integer.parseInt(split_line[1]);
            double aij = Double.parseDouble(split_line[2]);

            // y_i = sum aij*xj
            row.set(i);
            aij_x_vj.set(aij * x_i[j]);

            context.write(row, aij_x_vj);
        } else
            LOG.error("Error parsing line " + value);

    }
}