edu.indiana.soic.ts.mapreduce.pwd.SWGMap.java Source code

Java tutorial

Introduction

Here is the source code for edu.indiana.soic.ts.mapreduce.pwd.SWGMap.java

Source

/**
 * Software License, Version 1.0
 * 
 * Copyright 2003 The Trustees of Indiana University.  All rights reserved.
 * 
 *
 *Redistribution and use in source and binary forms, with or without 
 *modification, are permitted provided that the following conditions are met:
 *
 *1) All redistributions of source code must retain the above copyright notice,
 * the list of authors in the original source code, this list of conditions and
 * the disclaimer listed in this license;
 *2) All redistributions in binary form must reproduce the above copyright 
 * notice, this list of conditions and the disclaimer listed in this license in
 * the documentation and/or other materials provided with the distribution;
 *3) Any documentation included with all redistributions must include the 
 * following acknowledgement:
 *
 *"This product includes software developed by the Community Grids Lab. For 
 * further information contact the Community Grids Lab at 
 * http://communitygrids.iu.edu/."
 *
 * Alternatively, this acknowledgement may appear in the software itself, and 
 * wherever such third-party acknowledgments normally appear.
 * 
 *4) The name Indiana University or Community Grids Lab or NaradaBrokering, 
 * shall not be used to endorse or promote products derived from this software 
 * without prior written permission from Indiana University.  For written 
 * permission, please contact the Advanced Research and Technology Institute 
 * ("ARTI") at 351 West 10th Street, Indianapolis, Indiana 46202.
 *5) Products derived from this software may not be called NaradaBrokering, 
 * nor may Indiana University or Community Grids Lab or NaradaBrokering appear
 * in their name, without prior written permission of ARTI.
 * 
 *
 * Indiana University provides no reassurances that the source code provided 
 * does not infringe the patent or any other intellectual property rights of 
 * any other entity.  Indiana University disclaims any liability to any 
 * recipient for claims brought by any other entity based on infringement of 
 * intellectual property rights or otherwise.  
 *
 *LICENSEE UNDERSTANDS THAT SOFTWARE IS PROVIDED "AS IS" FOR WHICH NO 
 *WARRANTIES AS TO CAPABILITIES OR ACCURACY ARE MADE. INDIANA UNIVERSITY GIVES
 *NO WARRANTIES AND MAKES NO REPRESENTATION THAT SOFTWARE IS FREE OF 
 *INFRINGEMENT OF THIRD PARTY PATENT, COPYRIGHT, OR OTHER PROPRIETARY RIGHTS. 
 *INDIANA UNIVERSITY MAKES NO WARRANTIES THAT SOFTWARE IS FREE FROM "BUGS", 
 *"VIRUSES", "TROJAN HORSES", "TRAP DOORS", "WORMS", OR OTHER HARMFUL CODE.  
 *LICENSEE ASSUMES THE ENTIRE RISK AS TO THE PERFORMANCE OF SOFTWARE AND/OR 
 *ASSOCIATED MATERIALS, AND TO THE PERFORMANCE AND VALIDITY OF INFORMATION 
 *GENERATED USING SOFTWARE.
 */

package edu.indiana.soic.ts.mapreduce.pwd;

import java.io.IOException;
import java.util.HashMap;
import java.util.List;

import edu.indiana.soic.ts.dist.DistanceFunction;
import edu.indiana.soic.ts.utils.Utils;
import edu.indiana.soic.ts.utils.VectorPoint;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SWGMap extends Mapper<LongWritable, Text, LongWritable, SWGWritable> {
    private static final Logger LOG = LoggerFactory.getLogger(SWGMap.class);
    private long blockSize;
    private long noOfSequences;
    private long noOfDivisions;

    private DistanceFunction distFunc;

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        super.setup(context);
        Configuration conf = context.getConfiguration();

        this.blockSize = conf.getLong(Constants.BLOCK_SIZE, 1000);
        this.noOfSequences = conf.getLong(Constants.NO_OF_SEQUENCES, blockSize * 10);
        this.noOfDivisions = conf.getLong(Constants.NO_OF_DIVISIONS, noOfSequences / blockSize);
        String distFuncName = conf.get(Constants.DIST_FUNC);
        this.distFunc = (DistanceFunction) Utils.loadObject(distFuncName);
        this.distFunc.prepare(new HashMap<>());
    }

    public void map(LongWritable blockIndex, Text value, Context context) throws IOException, InterruptedException {
        long startTime = System.nanoTime();
        Configuration conf = context.getConfiguration();
        Counter alignmentCounter = context.getCounter(Constants.RecordCounters.ALIGNMENTS);
        String valString = value.toString();
        String valArgs[] = valString.split(Constants.BREAK);

        long rowBlock = Long.parseLong(valArgs[0]);
        long columnBlock = Long.parseLong(valArgs[1]);
        boolean isDiagonal = Boolean.parseBoolean(valArgs[2]);
        LOG.info("row column" + rowBlock + "  " + columnBlock + "  " + isDiagonal + "  " + valArgs[2]);

        long row = rowBlock * blockSize;
        long column = columnBlock * blockSize;

        long parseStartTime = System.nanoTime();
        FileSystem fs = FileSystem.getLocal(conf);
        // parse the inputFilePart for row
        Path rowPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + rowBlock);
        FSDataInputStream rowInStream = fs.open(rowPath);
        List<VectorPoint> rowSequences = SequenceParser.ParseFile(rowInStream);
        // parse the inputFilePart for column if this is not a diagonal block
        List<VectorPoint> colSequences;
        if (isDiagonal) {
            colSequences = rowSequences;
        } else {
            // parse the inputFilePart for column
            Path colPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + columnBlock);
            FSDataInputStream colInStream = fs.open(colPath);
            colSequences = SequenceParser.ParseFile(colInStream);
        }
        LOG.info("Parsing time : " + ((System.nanoTime() - parseStartTime) / 1000000) + "ms");

        short[][] alignments = new short[(int) blockSize][(int) blockSize];
        double[][] doubleDistances = new double[(int) blockSize][(int) blockSize];
        double max = Double.MIN_VALUE;
        for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) {
            int columnIndex = 0;
            for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) {
                double alignment;
                alignment = distFunc.calc(rowSequences.get(rowIndex), colSequences.get(columnIndex));
                if (alignment > max) {
                    max = alignment;
                }
                // Get the identity and make it percent identity
                doubleDistances[rowIndex][columnIndex] = alignment;
            }
            alignmentCounter.increment(columnIndex);
        }

        // divide by max to get the range to 0 to 1 and then convert to short and output
        for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) {
            int columnIndex = 0;
            for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) {
                double alignment = doubleDistances[rowIndex][columnIndex] / max;
                short scaledScore = (short) (alignment * Short.MAX_VALUE);
                alignments[rowIndex][columnIndex] = scaledScore;
            }
        }

        SWGWritable dataWritable = new SWGWritable(rowBlock, columnBlock, blockSize, false);
        dataWritable.setMax(max);
        dataWritable.setAlignments(alignments);
        context.write(new LongWritable(rowBlock), dataWritable);

        if (!isDiagonal) {
            // Create the transpose matrix of (rowBlock, colBlock) block to fill the
            // (colBlock, rowBlock) block.
            SWGWritable inverseDataWritable = new SWGWritable(columnBlock, rowBlock, blockSize, true);
            inverseDataWritable.setAlignments(alignments);
            context.write(new LongWritable(columnBlock), inverseDataWritable);
        }
        LOG.info("Map time : " + ((System.nanoTime() - startTime) / 1000000) + "ms");
    }
}