com.lakhani.anchorgraph.testCache.java Source code

Java tutorial

Introduction

Here is the source code for com.lakhani.anchorgraph.testCache.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

package com.lakhani.anchorgraph;

/**
 *
 * @author cmlakhan
 */

import java.util.Arrays;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.hadoop.conf.*;
//import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.commons.math.linear.RealVector;
import org.apache.commons.math.linear.ArrayRealVector;

public class testCache extends Configured implements Tool {

    public static class Map extends Mapper<LongWritable, Text, Text, Text> {

        private Text vecID = new Text();
        private Text vec = new Text();
        String vecSplitter = ",";

        private Path[] localFiles;
        //FileInputStream fis = new FileInputStream();
        //BufferedInputStream bis = new BufferedInputStream;

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            super.setup(context);
            //URI[] uris = DistributedCache.getCacheFiles(context.getConfiguration());
            localFiles = context.getLocalCacheFiles();
            // TODO
        }

        public void map(LongWritable key, Text value, Mapper.Context context)
                throws IOException, InterruptedException {

            Configuration conf = context.getConfiguration();
            int NUMFEATURES = Integer.parseInt(conf.get("numberFeatures"));
            int NUMCENTROIDS = Integer.parseInt(conf.get("numberCentroids"));

            String line = value.toString();
            String[] vecArray = line.split(vecSplitter);
            vecID.set("hello");

            //File file = new File(localFiles[0].toString());
            //fis = new FileInputStream(file);
            //bis = new BufferedInputStream(fis);
            //BufferedReader d = new BufferedReader(new InputStreamReader(bis));

            ArrayRealVector vecParse = lineToVector(line, NUMFEATURES);
            vec.set(localFiles[0].toString() + "test" + vecParse.toString());

            context.write(vecID, vec);
        }

        private ArrayRealVector lineToVector(String line, int NUMFEATURES) {
            String[] vecArray = line.split(vecSplitter);

            ArrayRealVector dd = new ArrayRealVector();
            for (int j = 0; j < NUMFEATURES; j++) {
                dd.append(Double.parseDouble(vecArray[j + 1]));
                //dd[j]=Double.parseDouble(vecArray[j+1]);    
            }
            return dd;
        }

    }

    public int run(String[] args) throws Exception {
        Configuration conf = new Configuration();

        Job job = new Job(conf, "testCache");
        job.addCacheFile(new URI("hdfs://zphdc1n1:8020/user/clakhani/anchorgraph/centroids.txt"));
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setMapperClass(Map.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        job.setJarByClass(testCache.class);
        job.submit();
        int rc = (job.waitForCompletion(true)) ? 1 : 0;
        return rc;
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new Configuration(), new testCache(), args);
        System.exit(res);
    }
}