wwh.exec.Main.java Source code

Java tutorial

Introduction

Here is the source code for wwh.exec.Main.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package wwh.exec;

import bgu.commons.exceptions.Exceptions;
import bgu.emc.wwh.catalog.model.MetaResource;
import bgu.emc.wwh.client.WWHClient;
import bgu.emc.wwh.mapreduce2.JobConfigurationProvider;
import bgu.emc.wwh.mapreduce2.WWHMapReduceJob;
import cloudBurst.MerReduce;
import createVector.VectorCreator;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.maven.shared.invoker.DefaultInvocationRequest;
import org.apache.maven.shared.invoker.DefaultInvoker;
import org.apache.maven.shared.invoker.InvocationRequest;
import org.apache.maven.shared.invoker.Invoker;

/**
 *
 * @author bennyl
 */
public class Main {

    private static final String[] CLUSTERS = { "172.17.0.1", "172.17.0.2", "172.17.0.3" };
    private static final String MAIN_CLUSTER_RM = CLUSTERS[0];
    private static final String LOCAL_DATA_FOLDER = "data/";
    private static final String REMOTE_DATA_FOLDER = "/user/bennyl/bio/";
    private static final String LOCAL_WORKSPACE = LOCAL_DATA_FOLDER + "workspace";
    private static final String REMOTE_WORKSPACE = REMOTE_DATA_FOLDER + "workspace";
    private static final String CACHE_DB1_PATH = LOCAL_DATA_FOLDER + "cache/Cluster1DbIndex";
    private static final String CACHE_DB2_PATH = LOCAL_DATA_FOLDER + "cache/Cluster2DbIndex";
    private static final String CACHE_DB3_PATH = LOCAL_DATA_FOLDER + "cache/Cluster3DbIndex";
    private static final String OUTPUT_RESULT_PATH = REMOTE_DATA_FOLDER + "result";

    private static final String JAR_PATH = "target/one-jar.jar";
    private static final String POM_PATH = "pom.xml";

    private static final String CB_RESULT_PATH = "/user/bennyl/bio/cloud-burst-result";

    public static void main(String[] args) throws Exception {

        System.out.println("Prepering the environment... ");
        prepareEnvironment();

        System.out.println("Creating jobs... ");
        WWHClient client = WWHClient.create(createClusterConfiguration(MAIN_CLUSTER_RM));

        WWHMapReduceJob job = new WWHMapReduceJob("cloud-burst");

        job.setUnresolvedMetaResources("wwh.bio.outbreakmonitoring@c0", "wwh.bio.outbreakmonitoring@c1",
                "wwh.bio.outbreakmonitoring@c2");

        job.setLocalJobsConfigurationProvider(new LocalJobProvider());
        job.setGlobalJobsConfigurationProvider((j, resources) -> {
            j.setReducerClass(IdentityReducer.class);
            j.setOutputFormatClass(TextOutputFormat.class);
            return false;
        });

        job.setWorkspacePath(REMOTE_WORKSPACE);
        job.setOutputPath(OUTPUT_RESULT_PATH);

        client.submit(job, createJar()).waitForTermination();
        System.out.println("DONE!");
    }

    private static void prepareEnvironment() throws IOException {
        FileSystem lfs = FileSystem.get(new Configuration());

        for (int i = 0; i < CLUSTERS.length; i++) {
            YarnConfiguration configuration = createClusterConfiguration(CLUSTERS[i]);
            FileSystem fs = FileSystem.get(configuration);

            fs.delete(new Path(REMOTE_DATA_FOLDER), true);
            fs.delete(new Path("/user/bennyl/cloud-burst"), true);

            Path localData = new Path(LOCAL_DATA_FOLDER + "c" + i);

            FileUtil.copy(lfs, localData, fs, new Path(REMOTE_DATA_FOLDER), false, true, configuration);

        }

        final YarnConfiguration mainClusterConfiguration = createClusterConfiguration(MAIN_CLUSTER_RM);
        FileUtil.copy(new File(LOCAL_WORKSPACE), FileSystem.get(mainClusterConfiguration),
                new Path(REMOTE_DATA_FOLDER), false, mainClusterConfiguration);

    }

    private static String readFastaList(String CacheDbPath) throws IOException {
        String ans = "";
        BufferedReader br = new BufferedReader(new FileReader(CacheDbPath));
        try {
            StringBuilder sb = new StringBuilder();
            String line = br.readLine();
            if (line != null) {
                sb.append(line.split("\t")[0]);
                line = br.readLine();
            }
            while (line != null) {
                sb.append("$");
                sb.append(line.split("\t")[0]);
                line = br.readLine();
            }
            ans = sb.toString();
        } finally {
            br.close();
            System.out.println(ans);
            return ans;
        }
    }

    private static String readGeneLength(String CacheDbPath) throws IOException {
        String ans = "";
        BufferedReader br = new BufferedReader(new FileReader(CacheDbPath));
        try {
            StringBuilder sb = new StringBuilder();
            String line = br.readLine();

            while (line != null) {
                sb.append(line);
                sb.append(System.lineSeparator());
                line = br.readLine();
            }
            ans = sb.toString();
        } finally {
            br.close();
            System.out.println(ans);
            return ans;
        }
    }

    private static YarnConfiguration createClusterConfiguration(String ip) {
        //create yarn configuration for c0 
        //A YarnConfiguration object is part of hadoop Yarn and 
        //is allows us to connect to a specific cluster and send application
        //masters into it.
        YarnConfiguration configuration = new YarnConfiguration();
        //configure the resource manager host name to submit jobs into
        configuration.set("yarn.resourcemanager.hostname", ip);
        //configure the hdfs to use
        configuration.set("fs.defaultFS", "hdfs://" + ip + ":9000");

        //        configuration.set(RegistryConstants.KEY_REGISTRY_ZK_QUORUM, ip + ":2181");
        return configuration;
    }

    private static class LocalJobProvider implements JobConfigurationProvider {

        Map<String, String> cache;

        int numCall = 0;

        public LocalJobProvider() throws IOException {
            cache = new HashMap<>();

            cache.put("wwh.bio.outbreakmonitoring@c0#FASTA", readFastaList(CACHE_DB1_PATH));
            cache.put("wwh.bio.outbreakmonitoring@c1#FASTA", readFastaList(CACHE_DB2_PATH));
            cache.put("wwh.bio.outbreakmonitoring@c2#FASTA", readFastaList(CACHE_DB3_PATH));

            cache.put("wwh.bio.outbreakmonitoring@c0#GEN", readGeneLength(CACHE_DB1_PATH));
            cache.put("wwh.bio.outbreakmonitoring@c1#GEN", readGeneLength(CACHE_DB2_PATH));
            cache.put("wwh.bio.outbreakmonitoring@c2#GEN", readGeneLength(CACHE_DB3_PATH));

        }

        public boolean configureNext(Job job, MetaResource[] resources) throws IOException {

            Configuration conf = job.getConfiguration();

            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);

            if (numCall == 0) {
                numCall++;
                conf.set("refpath", "ref.br");
                conf.set("qrypath", "qry.br");
                conf.set("MIN_READ_LEN", Integer.toString(99));
                conf.set("MAX_READ_LEN", Integer.toString(103));
                conf.set("K", Integer.toString(1));
                conf.set("SEED_LEN", Integer.toString(49));
                conf.set("FLANK_LEN", Integer.toString(55));
                conf.set("ALLOW_DIFFERENCES", Integer.toString(0));
                conf.set("BLOCK_SIZE", Integer.toString(128));
                conf.set("REDUNDANCY", Integer.toString(1));
                conf.set("FILTER_ALIGNMENTS", "0");
                conf.set("FastaList", cache.get(resources[0].getId().toString() + "#FASTA"));

                job.setInputFormatClass(SequenceFileInputFormat.class);
                job.setOutputFormatClass(TextOutputFormat.class);
                FileOutputFormat.setOutputPath(job, new Path(CB_RESULT_PATH));

                FileInputFormat.addInputPath(job, new Path(REMOTE_WORKSPACE + "/qry.br"));

                // The order of seeds is not important, but make sure the reference seeds are seen before the qry seeds
                job.setPartitionerClass(MerReduce.PartitionMers.class); // mv2
                job.setGroupingComparatorClass(MerReduce.GroupMersWC.class);

                job.setMapperClass(MerReduce.MapClass.class);
                job.setReducerClass(MerReduce.ReduceClass.class);
                job.setMapOutputKeyClass(BytesWritable.class);//mv2
                job.setMapOutputValueClass(BytesWritable.class);//mv2

                return true;
            } else {
                String clusterID = resources[0].getId().getClusterReferense();

                conf.set("GeneLengthLookup", cache.get(resources[0].getId().toString() + "#GEN"));
                conf.set("ClusterName", clusterID);

                FileInputFormat.setInputPaths(job, new Path(CB_RESULT_PATH));
                job.setMapperClass(VectorCreator.ClusterMapper.class);

                //conf.setInputFormat(SequenceFileInputFormat.class);
                job.setInputFormatClass(TextInputFormat.class);

                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(Text.class);
                job.setReducerClass(VectorCreator.ClusterReducer.class);

                return false;
            }

        }
    }

    public static File createJar() {

        System.out.println("Compiling project... ");
        Exceptions.uncheck(() -> {
            InvocationRequest request = new DefaultInvocationRequest();
            request.setPomFile(new File(POM_PATH));
            request.setGoals(Arrays.asList("package", "shade:shade"));

            Invoker invoker = new DefaultInvoker();
            System.out.println("executing!!!");
            invoker.execute(request);

        });

        return new File(JAR_PATH);
    }

    public static class IdentityReducer extends Reducer<Text, Text, Text, Text> {

        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            while (values.iterator().hasNext()) {
                context.write(key, values.iterator().next());
            }
        }

    }
}