Java tutorial
package org.intermine.bio.web.export; /* * Copyright (C) 2002-2013 FlyMine * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. See the LICENSE file for more * information or http://www.gnu.org/copyleft/lesser.html. * */ import java.io.OutputStream; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.commons.collections.keyvalue.MultiKey; import org.apache.log4j.Logger; import org.biojava.bio.Annotation; import org.biojava.bio.seq.DNATools; import org.biojava.bio.seq.Sequence; import org.biojava.bio.seq.io.FastaFormat; import org.biojava.bio.seq.io.SeqIOTools; import org.biojava.bio.symbol.IllegalSymbolException; import org.intermine.api.config.ClassKeyHelper; import org.intermine.api.results.ResultElement; import org.intermine.bio.web.biojava.BioSequence; import org.intermine.bio.web.biojava.BioSequenceFactory; import org.intermine.metadata.FieldDescriptor; import org.intermine.model.FastPathObject; import org.intermine.model.InterMineObject; import org.intermine.model.bio.BioEntity; import org.intermine.model.bio.Chromosome; import org.intermine.model.bio.Location; import org.intermine.model.bio.Protein; import org.intermine.model.bio.SequenceFeature; import org.intermine.objectstore.ObjectStore; import org.intermine.pathquery.Path; import org.intermine.util.IntPresentSet; import org.intermine.util.StringUtil; import org.intermine.web.logic.export.ExportException; import org.intermine.web.logic.export.ExportHelper; import org.intermine.web.logic.export.Exporter; /** * Export data in FASTA format. Select cell in each row that can be exported as * a sequence and fetch associated sequence. * * @author Kim Rutherford * @author Jakub Kulaviak **/ public class SequenceExporter implements Exporter { @SuppressWarnings("unused") private static final Logger LOG = Logger.getLogger(SequenceExporter.class); private ObjectStore os; private OutputStream out; private int featureIndex; private int writtenResultsCount = 0; private final Map<String, List<FieldDescriptor>> classKeys; private int extension; // must > 0 // Map to hold DNA sequence of a whole chromosome in memory private static Map<MultiKey, String> chromosomeSequenceMap = new HashMap<MultiKey, String>(); private List<Path> paths = Collections.emptyList(); /** * Constructor. * * @param os * object store used for fetching sequence for exported object * @param outputStream * output stream * @param featureIndex * index of cell in row that contains object to be exported * @param classKeys for the model */ public SequenceExporter(ObjectStore os, OutputStream outputStream, int featureIndex, Map<String, List<FieldDescriptor>> classKeys, int extension) { this.os = os; this.out = outputStream; this.featureIndex = featureIndex; this.classKeys = classKeys; this.extension = extension; } public SequenceExporter(ObjectStore os, OutputStream outputStream, int featureIndex, Map<String, List<FieldDescriptor>> classKeys, int extension, List<Path> paths) { this.os = os; this.out = outputStream; this.featureIndex = featureIndex; this.classKeys = classKeys; this.extension = extension; this.paths = paths; } /** * {@inheritDoc} */ public int getWrittenResultsCount() { return writtenResultsCount; } @Override public void export(Iterator<? extends List<ResultElement>> resultIt) { export(resultIt, paths, paths); } /** * {@inheritDoc} Lines are always separated with \n because third party tool * writeFasta is used for writing sequence. */ public void export(Iterator<? extends List<ResultElement>> resultIt, Collection<Path> unionPathCollection, Collection<Path> newPathCollection) { // IDs of the features we have successfully output - used to avoid // duplicates IntPresentSet exportedIDs = new IntPresentSet(); try { while (resultIt.hasNext()) { List<ResultElement> row = resultIt.next(); StringBuffer header = new StringBuffer(); ResultElement resultElement = row.get(featureIndex); Sequence bioSequence; Object object = os.getObjectById(resultElement.getId()); if (!(object instanceof InterMineObject)) { continue; } Integer objectId = ((InterMineObject) object).getId(); if (exportedIDs.contains(objectId)) { // exported already continue; } if (object instanceof SequenceFeature) { if (extension > 0) { bioSequence = createSequenceFeatureWithExtension(header, object, row, unionPathCollection, newPathCollection); } else { bioSequence = createSequenceFeature(header, object, row, unionPathCollection, newPathCollection); } } else if (object instanceof Protein) { bioSequence = createProtein(header, object, row, unionPathCollection, newPathCollection); } else { // ignore other objects continue; } if (bioSequence == null) { // the object doesn't have a sequence header.append("no sequence attached."); continue; } Annotation annotation = bioSequence.getAnnotation(); String headerString = header.toString(); if (headerString.length() > 0) { annotation.setProperty(FastaFormat.PROPERTY_DESCRIPTIONLINE, headerString); } else { if (object instanceof BioEntity) { annotation.setProperty(FastaFormat.PROPERTY_DESCRIPTIONLINE, ((BioEntity) object).getPrimaryIdentifier()); } else { // last resort annotation.setProperty(FastaFormat.PROPERTY_DESCRIPTIONLINE, "sequence_" + exportedIDs.size()); } } SeqIOTools.writeFasta(out, bioSequence); writtenResultsCount++; exportedIDs.add(objectId); } if (writtenResultsCount == 0) { out.write("Nothing was found for export".getBytes(Charset.forName("UTF-8"))); } out.flush(); } catch (Exception e) { throw new ExportException("Export failed.", e); } } private BioSequence createProtein(StringBuffer header, Object object, List<ResultElement> row, Collection<Path> unionPathCollection, Collection<Path> newPathCollection) throws IllegalSymbolException { BioSequence bioSequence; Protein protein = (Protein) object; bioSequence = BioSequenceFactory.make(protein); makeHeader(header, object, row, unionPathCollection, newPathCollection); return bioSequence; } private BioSequence createSequenceFeature(StringBuffer header, Object object, List<ResultElement> row, Collection<Path> unionPathCollection, Collection<Path> newPathCollection) throws IllegalSymbolException { BioSequence bioSequence; SequenceFeature feature = (SequenceFeature) object; bioSequence = BioSequenceFactory.make(feature); makeHeader(header, object, row, unionPathCollection, newPathCollection); return bioSequence; } private Sequence createSequenceFeatureWithExtension(StringBuffer header, Object object, List<ResultElement> row, Collection<Path> unionPathCollection, Collection<Path> newPathCollection) throws IllegalSymbolException { SequenceFeature feature = (SequenceFeature) object; Chromosome chr = feature.getChromosome(); String chrName = chr.getPrimaryIdentifier(); int chrLength = chr.getLength(); int start = feature.getChromosomeLocation().getStart(); int end = feature.getChromosomeLocation().getEnd(); String org = feature.getOrganism().getShortName(); String chrResidueString; if (chromosomeSequenceMap.get(new MultiKey(chrName, org)) == null) { chrResidueString = chr.getSequence().getResidues().toString(); chromosomeSequenceMap.put(new MultiKey(chrName, org), chr.getSequence().getResidues().toString()); } else { chrResidueString = chromosomeSequenceMap.get(new MultiKey(chrName, org)); } if (extension > 0) { start = start - extension; end = end + extension; } end = Math.min(end, chrLength); start = Math.max(start, 1); String seqName = "genomic_region_" + chrName + "_" + start + "_" + end + "_" + org.replace("\\. ", "_"); Sequence seq = DNATools.createDNASequence(chrResidueString.substring(start - 1, end), seqName); makeHeader(header, object, row, unionPathCollection, newPathCollection); return seq; } /** * Set the header to be the contents of row, separated by spaces. */ private void makeHeader(StringBuffer header, Object object, List<ResultElement> row, Collection<Path> unionPathCollection, Collection<Path> newPathCollection) { List<String> headerBits = new ArrayList<String>(); // add the Object's (Protein or LocatedSequenceFeature) // primaryIdentifier at the first place // in the header Object keyFieldValue = ClassKeyHelper.getKeyFieldValue((FastPathObject) object, this.classKeys); if (keyFieldValue != null) { headerBits.add(keyFieldValue.toString()); } else { headerBits.add("-"); } // List<Object> keyFieldValues = // ClassKeyHelper.getKeyFieldValues((FastPathObject) object, this.classKeys); // for (Object key : keyFieldValues) { // if (key != null) { // headerBits.add(key.toString()); // } // } // here unionPathCollection is newPathCollection List<ResultElement> subRow = new ArrayList<ResultElement>(); if (newPathCollection != null && unionPathCollection != null && unionPathCollection.containsAll(newPathCollection)) { for (Path p : newPathCollection) { if (!p.toString().endsWith(".id")) { subRow.add(row.get(((List<Path>) unionPathCollection).indexOf(p))); } } } else { subRow = row; } // two instances if (object instanceof SequenceFeature) { // add the sequence location info at the second place in the header SequenceFeature feature = (SequenceFeature) object; Location loc = feature.getChromosomeLocation(); if (loc == null) { headerBits.add("-"); } else { // Assume if loc exits, the following information should be available String chr = loc.getLocatedOn().getPrimaryIdentifier(); Integer start = loc.getStart(); Integer end = loc.getEnd(); String locString = chr + ':' + start + '-' + end; headerBits.add(locString); } if (extension > 0) { headerBits.add("extension:" + extension + "bp"); } for (ResultElement re : subRow) { // to avoid failure in modmine when no experimental factors (sub 2745) if (re == null) { continue; } // Disable collection export until further bug diagnose if (re.getPath().containsCollections()) { continue; } Object fieldValue = re.getField(); if (fieldValue == null) { headerBits.add("-"); } else if (fieldValue.toString().equals(keyFieldValue) || (re.getObject() instanceof Location) || (re.getObject() instanceof Chromosome)) { // ignore the primaryIdentifier and Location in // ResultElement continue; } else { headerBits.add(fieldValue.toString()); } } } else if (object instanceof Protein) { for (ResultElement re : subRow) { if (re == null) { continue; } // Disable collection export until further bug diagnose if (re.getPath().containsCollections()) { continue; } Object fieldValue = re.getField(); if (fieldValue == null) { headerBits.add("-"); } else if (fieldValue.toString().equals(keyFieldValue)) { continue; } else { headerBits.add(fieldValue.toString()); } } } header.append(StringUtil.join(headerBits, " ")); } /** * {@inheritDoc} */ public boolean canExport(List<Class<?>> clazzes) { return canExportStatic(clazzes); } /* * Method must have different name than canExport because canExport() method * is inherited from Exporter interface */ /** * @param clazzes * classes of result * @return true if this exporter can export result composed of specified * classes */ public static boolean canExportStatic(List<Class<?>> clazzes) { return (ExportHelper.getClassIndex(clazzes, SequenceFeature.class) >= 0 || ExportHelper.getClassIndex(clazzes, Protein.class) >= 0 // || ExportHelper.getClassIndex(clazzes, Sequence.class) >= 0 ); } }