com.github.seqware.queryengine.plugins.contribs.DonorsToMutationsAndGenesAggregationPlugin.java Source code

Java tutorial

Introduction

Here is the source code for com.github.seqware.queryengine.plugins.contribs.DonorsToMutationsAndGenesAggregationPlugin.java

Source

/*
 * Copyright (C) 2012 SeqWare
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package com.github.seqware.queryengine.plugins.contribs;

import com.github.seqware.queryengine.model.Feature;
import com.github.seqware.queryengine.model.FeatureSet;
import com.github.seqware.queryengine.model.Tag;
import com.github.seqware.queryengine.plugins.runners.MapperInterface;
import com.github.seqware.queryengine.plugins.runners.ReducerInterface;
import com.github.seqware.queryengine.plugins.recipes.FilteredFileOutputPlugin;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.io.Text;

/**
 * This plug-in implements a quick and dirty export using Map/Reduce
 *
 * TODO: Copy from HDFS and parse key value file to VCF properly.
 *
 * @author dyuen
 * @version $Id: $Id
 */
public class DonorsToMutationsAndGenesAggregationPlugin extends FilteredFileOutputPlugin {

    private Text text = new Text();
    private Text textKey = new Text();

    @Override
    public void map(long position, Map<FeatureSet, Collection<Feature>> atoms,
            MapperInterface<Text, Text> mapperInterface) {

        // the key is donorID, the value is a hash of mutation ID -> geneArray
        Map<FeatureSet, HashMap<String, ArrayList<String>>> results = new HashMap<FeatureSet, HashMap<String, ArrayList<String>>>();

        // for each feature set corresponding to a donor
        for (FeatureSet fs : atoms.keySet()) {

            // for each mutation
            for (Feature f : atoms.get(fs)) {

                if (f.getStart() != position) {
                    continue;
                }

                // mutation ID
                String id = null;
                // gene ID
                ArrayList<String> genes = new ArrayList<String>();
                for (Tag t : f.getTags()) {
                    if ("id".equals(t.getKey())) {
                        id = t.getValue().toString();
                    }
                    if ("EnsemblGene".equals(t.getKey())) {
                        genes.addAll(Arrays.asList(t.getValue().toString().split(",")));
                    }
                }
                // now load this feature set (donor) -> mutation id -> genes list 
                HashMap<String, ArrayList<String>> value = results.get(fs);
                if (value == null) {
                    value = new HashMap<String, ArrayList<String>>();
                }
                value.put(id, genes);

                results.put(fs, value);

            }
        }

        // now iterate and add to results, start with feature_set
        for (FeatureSet fs : results.keySet()) {
            boolean first = true;
            StringBuilder valueStr = new StringBuilder();
            //valueStr.append(fs+"\t");
            for (String mutation : results.get(fs).keySet()) {
                // formating
                if (first) {
                    first = false;
                } else {
                    valueStr.append(";");
                }
                valueStr.append(mutation).append("::");
                boolean first2 = true;
                StringBuilder geneList = new StringBuilder();
                for (String gene : results.get(fs).get(mutation)) {
                    if (first2) {
                        first2 = false;
                    } else {
                        valueStr.append(",");
                    }
                    valueStr.append(gene);
                }
            }
            Tag tagByKey = fs.getTagByKey("donor");
            String donor = (String) tagByKey.getValue();
            textKey.set(donor);
            text.set(valueStr.toString());
            mapperInterface.write(textKey, text);
        }
    }

    @Override
    public void reduce(Text key, Iterable<Text> values, ReducerInterface<Text, Text> reducerInterface) {
        // key is feature set, value is mutation->gene that can just be cat'd
        Text newVal = new Text();
        StringBuilder newValSB = new StringBuilder();
        newValSB.append(key).append("\t");
        boolean first = true;
        for (Text val : values) {
            if (first) {
                first = false;
            } else {
                newValSB.append(";");
            }
            newValSB.append(val.toString());
        }
        newVal.set(newValSB.toString());
        reducerInterface.write(newVal, null);
    }
}