Java tutorial
/* * Copyright (C) 2010-2011 "Bio4j" * * This file is part of Bio4j * * Bio4j is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package com.bio4j.neo4jdb.model.util; import com.bio4j.neo4jdb.model.nodes.GoTermNode; import com.bio4j.neo4jdb.model.nodes.ProteinNode; import com.bio4j.neo4jdb.model.relationships.go.IsAGoRel; import com.bio4j.neo4jdb.model.relationships.protein.ProteinGoRel; import com.ohnosequences.xml.model.go.GOSlimXML; import com.ohnosequences.xml.model.go.GoAnnotationXML; import com.ohnosequences.xml.model.go.GoTermXML; import com.ohnosequences.xml.model.go.SlimSetXML; import com.ohnosequences.xml.model.uniprot.ProteinXML; import java.io.IOException; import java.util.*; import java.util.logging.FileHandler; import java.util.logging.Level; import java.util.logging.Logger; import java.util.logging.SimpleFormatter; import org.jdom2.Element; import org.neo4j.graphdb.Direction; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Relationship; import org.neo4j.graphdb.index.Index; import org.neo4j.graphdb.index.IndexHits; import org.neo4j.graphdb.index.RelationshipIndex; /** * * @author Pablo Pareja Tobes <ppareja@era7.com> */ public class GoUtil { private static final Logger logger = Logger.getLogger("GoUtil"); static { try { FileHandler fh = new FileHandler("GoUtil.log", true); SimpleFormatter formatter = new SimpleFormatter(); fh.setFormatter(formatter); logger.addHandler(fh); logger.setLevel(Level.ALL); } catch (IOException ex) { Logger.getLogger(GoUtil.class.getName()).log(Level.SEVERE, null, ex); } catch (SecurityException ex) { Logger.getLogger(GoUtil.class.getName()).log(Level.SEVERE, null, ex); } } public static GoAnnotationXML getGoAnnotation(ArrayList<ProteinXML> proteins, Bio4jManager manager) { GoAnnotationXML annotationXML = new GoAnnotationXML(); HashMap<String, GoTermXML> goAnnotatorsMap = new HashMap<String, GoTermXML>(); HashMap<String, Integer> goCountsMap = new HashMap<String, Integer>(); // Transaction txn = manager.beginTransaction(); try { //IndexService indexService = manager.getIndexService(); Index<Node> proteinAccessionIndex = manager.getProteinAccessionIndex(); ProteinGoRel proteinGoRel = new ProteinGoRel(null); Iterator<Relationship> relIterator = null; for (ProteinXML proteinXML : proteins) { IndexHits<Node> protHits = proteinAccessionIndex.get(ProteinNode.PROTEIN_ACCESSION_INDEX, proteinXML.getId()); if (protHits.hasNext()) { ProteinNode proteinNode = new ProteinNode(protHits.getSingle()); relIterator = proteinNode.getNode().getRelationships(proteinGoRel, Direction.OUTGOING) .iterator(); while (relIterator.hasNext()) { proteinGoRel = new ProteinGoRel(relIterator.next()); GoTermNode goTermNode = new GoTermNode(proteinGoRel.getRelationship().getEndNode()); String goId = goTermNode.getId(); GoTermXML goXml = new GoTermXML(); goXml.setId(goId); goXml.setAspect(goTermNode.getNamespace()); goXml.setGoName(goTermNode.getName()); goXml.setEvidence(proteinGoRel.getEvidence()); proteinXML.addGoTerm(goXml, true); Integer goCount = goCountsMap.get(goId); if (goCount == null) { goCountsMap.put(goId, 1); goAnnotatorsMap.put(goId, new GoTermXML((Element) goXml.asJDomElement().clone())); } else { goCountsMap.put(goId, (goCount + 1)); } } proteinXML.detach(); annotationXML.addProteinAnnotation(proteinXML); } } Set<String> keySet = goAnnotatorsMap.keySet(); for (String currentKey : keySet) { GoTermXML tempGo = goAnnotatorsMap.get(currentKey); tempGo.setAnnotationsCount(goCountsMap.get(currentKey)); annotationXML.addAnnotatorGoTerm(tempGo); } // txn.success(); } catch (Exception e) { logger.log(Level.SEVERE, e.getMessage()); // txn.failure(); annotationXML = null; } finally { // txn.finish(); } return annotationXML; } /** * * @param proteins * @param slimSetXML * @param manager * @param goAnnotationXML * @return GO Slim in xml format */ public static GOSlimXML getGoSlim(ArrayList<ProteinXML> proteins, SlimSetXML slimSetXML, Bio4jManager manager, GoAnnotationXML goAnnotationXML) { GOSlimXML goSlimXML = new GOSlimXML(); if (goAnnotationXML == null) { goAnnotationXML = GoUtil.getGoAnnotation(proteins, manager); } //int goTermsLostNotIncludedInSlimSet = 0; Index<Node> goTermIdIndex = manager.getGoTermIdIndex(); if (goAnnotationXML != null) { List<GoTermXML> goAnnotators = goAnnotationXML.getAnnotatorGoTerms(); //IndexService indexService = manager.getIndexService(); // in this hash map there is one entry for each annotator go term // the hash-set contains every slim-set go term including the annotator HashMap<String, HashSet<String>> goAnnotatorsIncludingSlimSetTermsMap = new HashMap<String, HashSet<String>>(); //Here are the xml elements of the Go terms from the slim set termid --> term xml HashMap<String, GoTermXML> slimSetGos = new HashMap<String, GoTermXML>(); //Here are the number of proteins annotated for each go term of the slim set termid --> number of proteins annotated HashMap<String, Integer> slimSetTermsAnnotationCounts = new HashMap<String, Integer>(); //Now I extract the ids of the SlimSet List<Element> slimElements = slimSetXML.asJDomElement().getChildren(GoTermXML.TAG_NAME); for (Element slimElement : slimElements) { GoTermXML tempGo = new GoTermXML(slimElement); //--completing data of slimset go terms----- GoTermNode tempGoNode = new GoTermNode( manager.getGoTermIdIndex().get(GoTermNode.GO_TERM_ID_INDEX, tempGo.getId()).getSingle()); tempGo.setAspect(tempGoNode.getNamespace()); tempGo.setGoName(tempGoNode.getName()); //------------------------ slimSetGos.put(tempGo.getId(), tempGo); //initializing annotation counts map slimSetTermsAnnotationCounts.put(tempGo.getId(), 0); } //-------------------------------------------- // logger.log(Level.INFO, "slimsetIds:"); // for (String slimId : slimSetIds) { // logger.log(Level.INFO, ("slimId:" + slimId)); // } //Now it is time for goAnnotatorsIncludingSlimSetTermsMap initialization for (GoTermXML goAnnotator : goAnnotators) { goAnnotatorsIncludingSlimSetTermsMap.put(goAnnotator.getId(), new HashSet<String>()); } //------------------------------------------------------ try { IsAGoRel goParentRel = new IsAGoRel(null); //Now I search the way up of every go Annotator and check if in the way I find //any of the terms included in the slim set. //logger.log(Level.INFO, "lalalala"); int callCounter = 0; for (GoTermXML goAnnotator : goAnnotators) { //this array includes the term own id and every ancestor id HashSet<String> ancestorsIds = new HashSet<String>(); GoTermNode goTermNode = new GoTermNode( goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, goAnnotator.getId()).getSingle()); //logger.log(Level.INFO, goTermNode.toString()); fillUpAncestorIds(goTermNode, ancestorsIds, goParentRel, manager.getIsAGoRelIndex(), callCounter); for (String ancestorId : ancestorsIds) { //If the ancestor is included in the slim set, it means that this term //from the slim set includes the goAnnotator if (slimSetGos.keySet().contains(ancestorId)) { HashSet<String> hashSet = goAnnotatorsIncludingSlimSetTermsMap.get(goAnnotator.getId()); hashSet.add(ancestorId); //ancestorId is actually one of the slim-set terms ids. } } callCounter++; } //So now I should have every goAnnotator with its corresponing slimSet terms List<Element> proteinList = goAnnotationXML.getProteinAnnotations() .getChildren(ProteinXML.TAG_NAME); Element proteinsElem = new Element(GOSlimXML.PROTEINS_TAG_NAME); int sampleAnnotatedGeneNumber = 0; for (Element currentElem : proteinList) { boolean annotated = false; //getting the protein ProteinXML currentProteinXML = new ProteinXML(currentElem); //initializing inductors map HashMap<String, String> currentProteinSlimTermInductors = new HashMap<String, String>(); //proteinSlimTermsAndInductorTermsMap.put(currentProteinXML.getId(), currentProteinSlimTermInductors); //System.out.println("currentProteinXML.getId() = " + currentProteinXML.getId()); //--------now we access to its go annotations------------- List<GoTermXML> proteinTerms = new ArrayList<GoTermXML>(); List<GoTermXML> bpTerms = currentProteinXML.getBiologicalProcessGoTerms(); List<GoTermXML> ccTerms = currentProteinXML.getCellularComponentGoTerms(); List<GoTermXML> mfTerms = currentProteinXML.getMolecularFunctionGoTerms(); if (bpTerms != null) { proteinTerms.addAll(bpTerms); } if (ccTerms != null) { proteinTerms.addAll(ccTerms); } if (mfTerms != null) { proteinTerms.addAll(mfTerms); } //------------------------------------------------------ //creating the result xml protein ProteinXML proteinResult = new ProteinXML(); proteinResult.setId(currentProteinXML.getId()); HashSet<String> proteinSlimTems = new HashSet<String>(); for (GoTermXML goTermXML : proteinTerms) { HashSet<String> hashSet = goAnnotatorsIncludingSlimSetTermsMap.get(goTermXML.getId()); System.out.println(""); if (hashSet != null) { if (hashSet.size() > 0) { proteinSlimTems.addAll(hashSet); for (String tempSlimTermId : hashSet) { currentProteinSlimTermInductors.put(tempSlimTermId, goTermXML.getId()); } annotated = true; } else { //-----The go term annotation lost is stored------- goSlimXML.addGoTermLostNotIncludedInSlimSet( new GoTermXML((Element) goTermXML.asJDomElement().clone())); System.out.println("holaaa!" + goTermXML.getId()); } } } //now we get the info from the slimset go terms for (String string : proteinSlimTems) { //logger.log(Level.INFO, ("string: " + string)); GoTermXML tempGoTerm = new GoTermXML( (Element) slimSetGos.get(string).asJDomElement().clone()); //------Adding protein annotation term leading to slim set term ----------- String termInductorId = currentProteinSlimTermInductors.get(tempGoTerm.getId()); //look for inductor info for (GoTermXML goTermXML : proteinTerms) { //System.out.println("goTermXML = " + goTermXML); if (goTermXML.getId().equals(termInductorId)) { tempGoTerm.setProteinAnnotationLeadingToSlimTerm( new GoTermXML((Element) goTermXML.asJDomElement().clone())); break; } } //-------------------------------------------------------------------------- //logger.log(Level.INFO, ("tempGoTerm: " + tempGoTerm)); proteinResult.addGoTerm(tempGoTerm, true); //updating annotation counts slimSetTermsAnnotationCounts.put(string, slimSetTermsAnnotationCounts.get(string) + 1); } proteinsElem.addContent(proteinResult.asJDomElement()); if (annotated) { sampleAnnotatedGeneNumber++; } } //updating slimset annotation counts List<Element> slimSetElems = slimSetXML.asJDomElement().getChildren(GoTermXML.TAG_NAME); for (Element slimSetElem : slimSetElems) { GoTermXML slimSetGo = new GoTermXML(slimSetElem); slimSetGo.setAnnotationsCount(slimSetTermsAnnotationCounts.get(slimSetGo.getId())); } goSlimXML.asJDomElement().addContent(proteinsElem); slimSetXML.detach(); goSlimXML.setSlimSet(slimSetXML); goSlimXML.setSampleGeneNumber(proteinList.size()); goSlimXML.setSampleAnnotatedGeneNumber(sampleAnnotatedGeneNumber); //goSlimXML.set } catch (Exception e) { logger.log(Level.SEVERE, e.getMessage()); StackTraceElement[] trace = e.getStackTrace(); for (StackTraceElement stackTraceElement : trace) { logger.log(Level.SEVERE, stackTraceElement.toString()); } goSlimXML = null; } } else { goSlimXML = null; } return goSlimXML; } private static void fillUpAncestorIds(GoTermNode node, HashSet<String> ancestorsIds, IsAGoRel goParentRel, RelationshipIndex goParentRelIndex, int call) { ancestorsIds.add(node.getId()); //logger.log(Level.INFO, ("fillUpAncestorIds (v2) --> " + node.getId() + " call: " + call)); Iterator<Relationship> relIterator = goParentRelIndex .get(IsAGoRel.IS_A_REL_INDEX, String.valueOf(node.getNode().getId())).iterator(); if (relIterator.hasNext()) { node = new GoTermNode(relIterator.next().getEndNode()); fillUpAncestorIds(node, ancestorsIds, goParentRel, goParentRelIndex, call); while (relIterator.hasNext()) { node = new GoTermNode(relIterator.next().getEndNode()); //logger.log(Level.INFO, ("double parent --> " + node.getId() + " call: " + call)); fillUpAncestorIds(node, ancestorsIds, goParentRel, goParentRelIndex, call + 400000); } } } }