org.intermine.bio.dataconversion.SgdGffUtrConverter.java Source code

Java tutorial

Introduction

Here is the source code for org.intermine.bio.dataconversion.SgdGffUtrConverter.java

Source

package org.intermine.bio.dataconversion;

/*
 * Copyright (C) 2002-2016 FlyMine
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  See the LICENSE file for more
 * information or http://www.gnu.org/copyleft/lesser.html.
 *
 */

import java.io.Reader;
import java.io.BufferedReader;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.lang.StringUtils;
import org.intermine.dataconversion.ItemWriter;
import org.intermine.metadata.Model;
import org.intermine.xml.full.Item;
import org.intermine.objectstore.ObjectStoreException;

/**
 * 
 * @author
 */
public class SgdGffUtrConverter extends BioFileConverter {

    private static final String DATASET_TITLE = "SGD UTRs from GFF";
    private static final String DATA_SOURCE_NAME = "SGD UTRs from Transcriptome GFF";
    private final Map<String, Item> genes = new HashMap<String, Item>();
    private final Map<String, Item> transcripts = new HashMap<String, Item>();
    private Map<String, String> chromosomes = new HashMap();
    private static final String TAXON_ID = "4932";
    private Item organism;

    /**
     * Constructor
     * @param writer the ItemWriter used to handle the resultant items
     * @param model the Model
     */
    public SgdGffUtrConverter(ItemWriter writer, Model model) throws ObjectStoreException {
        super(writer, model, DATA_SOURCE_NAME, DATASET_TITLE);
        organism = createItem("Organism");
        organism.setAttribute("taxonId", TAXON_ID);
        organism.setAttribute("genus", "Saccharomyces");
        organism.setAttribute("species", "cerevisiae");
        organism.setAttribute("name", "Saccharomyces cerevisiae");
        organism.setAttribute("shortName", "S. cerevisiae");
        store(organism);
    }

    /**
     * 
     *
     * {@inheritDoc}
     */
    public void process(Reader reader) throws Exception {
        processFile(reader);
        storeTranscripts();
        storeGenes();
    }

    private void processFile(Reader preader) throws Exception, ObjectStoreException {

        /*
        Gene.secondaryIdentifier   -- key for gene
        Gene.transcripts.primaryIdentifier   -- key for transcript
        Gene.transcripts.symbol   
        Gene.transcripts.chromosome.primaryIdentifier   
        Gene.chromosomeLocation.start   
        Gene.transcripts.chromosomeLocation.start   
        Gene.chromosomeLocation.end   
        Gene.transcripts.chromosomeLocation.end   
        Gene.chromosomeLocation.strand   
        Gene.transcripts.fivePrimeDataSet   
        Gene.transcripts.threePrimeDataSet
         */
        System.out.println("Processing SGD transcript info data file exported from YeastMine....");

        BufferedReader br = new BufferedReader(preader);
        String line = null;
        String notes = "";

        while ((line = br.readLine()) != null) {

            String[] array = line.split("\t", -1); //keep trailing empty
            if (array.length < 11) {
                throw new IllegalArgumentException(
                        "Not enough elements (should be  11 not " + array.length + ") in line: " + line);
            }
            String geneId = array[0].trim();
            String transcriptId = array[1].trim();
            String chromosome = array[3].trim();
            String geneStart = array[4].trim();
            String transcriptStart = array[5].trim();
            String geneEnd = array[6].trim();
            String transcriptEnd = array[7].trim();
            String strand = array[8].trim();

            if (geneStart.equalsIgnoreCase(transcriptStart)) {
                System.out.println("TS and GS are same : " + geneId + "   " + transcriptId);
                continue;
            }

            if (geneEnd.equalsIgnoreCase(transcriptEnd)) {
                System.out.println("TE and GE are same : " + geneId + "   " + transcriptId);
                continue;
            }

            System.out.println("Processing line..." + geneId + "   " + transcriptId);
            getUTRs(geneId, transcriptId, chromosome, geneStart, transcriptStart, geneEnd, transcriptEnd, strand);

        }

        preader.close();

    }

    private void getUTRs(String geneId, String transcriptId, String chromosome, String geneStart,
            String transcriptStart, String geneEnd, String transcriptEnd, String strand)
            throws ObjectStoreException {

        Item gene = getGeneItem(geneId);
        Item transcript = getTranscriptItem(transcriptId);

        String chromosomeId = getChromosome(chromosome);

        //add a five-prime-utr
        Item fiveutr = null;
        String label = "";
        if (strand.equals("-1")) {
            fiveutr = createItem("ThreePrimeUTR");
            label = transcriptId + "-3prime-utr";
        } else {
            fiveutr = createItem("FivePrimeUTR");
            label = transcriptId + "-5prime-utr";
        }
        fiveutr.setAttribute("primaryIdentifier", label);
        Integer start = Integer.valueOf(geneStart) - 1;
        String fivePrimeLocationRefId = getLocation(fiveutr, chromosomeId, transcriptStart, start.toString(),
                strand);
        fiveutr.setReference("chromosome", chromosomeId);
        fiveutr.setReference("chromosomeLocation", fivePrimeLocationRefId);
        store(fiveutr);

        //add a three-prime-utr
        Item threeutr = null;
        String label2 = "";
        if (strand.equals("-1")) {
            threeutr = createItem("FivePrimeUTR");
            label2 = transcriptId + "-5prime-utr";

        } else {
            threeutr = createItem("ThreePrimeUTR");
            label2 = transcriptId + "-3prime-utr";
        }
        threeutr.setAttribute("primaryIdentifier", label2);
        Integer geneend = Integer.valueOf(geneEnd) + 1;
        String threePrimeLocationRefId = getLocation(threeutr, chromosomeId, geneend.toString(), transcriptEnd,
                strand);
        threeutr.setReference("chromosome", chromosomeId);
        threeutr.setReference("chromosomeLocation", threePrimeLocationRefId);
        store(threeutr);

        transcript.addToCollection("UTRs", fiveutr);
        transcript.addToCollection("UTRs", threeutr);

        gene.addToCollection("transcripts", transcript);

    }

    private String getChromosome(String identifier) throws ObjectStoreException {
        if (StringUtils.isEmpty(identifier)) {
            return null;
        }
        String refId = chromosomes.get(identifier);
        if (refId == null) {
            Item item = createItem("Chromosome");
            item.setAttribute("primaryIdentifier", identifier);
            item.setReference("organism", organism);
            refId = item.getIdentifier();
            chromosomes.put(identifier, refId);
            try {
                store(item);
            } catch (ObjectStoreException e) {
                throw new ObjectStoreException(e);
            }
        }
        return refId;
    }

    private String getLocation(Item subject, String chromosomeRefId, String startCoord, String stopCoord,
            String strand) throws ObjectStoreException {

        String start = startCoord;
        String end = stopCoord;

        if (!StringUtils.isEmpty(start) && !StringUtils.isEmpty(end)) {
            subject.setAttribute("length", getLength(start, end));
        }
        Item location = createItem("Location");
        if (!StringUtils.isEmpty(start))
            location.setAttribute("start", start);
        if (!StringUtils.isEmpty(end))
            location.setAttribute("end", end);
        if (!StringUtils.isEmpty(strand))
            location.setAttribute("strand", strand);
        location.setReference("feature", subject);
        location.setReference("locatedOn", chromosomeRefId);
        try {
            store(location);
        } catch (ObjectStoreException e) {
            throw new ObjectStoreException(e);
        }
        return location.getIdentifier();
    }

    private String getLength(String start, String end) throws NumberFormatException {
        Integer a = new Integer(start);
        Integer b = new Integer(end);
        // if the coordinates are on the crick strand, they need to be reversed
        // or they result in a negative number
        if (a.compareTo(b) > 0) {
            a = new Integer(end);
            b = new Integer(start);
        }
        Integer length = new Integer(b.intValue() - a.intValue());
        return length.toString();
    }

    private Item getGeneItem(String geneId) throws ObjectStoreException {
        Item gene = genes.get(geneId);
        if (gene == null) {
            gene = createItem("Gene");
            genes.put(geneId, gene);
            gene.setAttribute("secondaryIdentifier", geneId);
        }
        return gene;
    }

    private Item getTranscriptItem(String transcriptId) throws ObjectStoreException {
        Item transcript = transcripts.get(transcriptId);
        if (transcript == null) {
            transcript = createItem("MRNA");
            transcripts.put(transcriptId, transcript);
            transcript.setAttribute("primaryIdentifier", transcriptId);
        }
        return transcript;
    }

    private void storeGenes() throws Exception {
        for (Item gene : genes.values()) {
            try {
                store(gene);
            } catch (ObjectStoreException e) {
                throw new Exception(e);
            }
        }
    }

    private void storeTranscripts() throws Exception {
        for (Item transcript : transcripts.values()) {
            try {
                store(transcript);
            } catch (ObjectStoreException e) {
                throw new Exception(e);
            }
        }

    }
}