List of usage examples for org.jdom2 Element getText
public String getText()
From source file:com.athena.chameleon.engine.utils.PDFWriterUtil.java
License:Apache License
/** * /*from www . j a v a 2s . co m*/ * list * * @param section list section ? * @param e list element * @throws Exception */ public static void setList(Section section, Element e) throws Exception { List list = new List(false, 15); list.setIndentationLeft(23); for (Element e1 : e.getChildren()) { ListItem item = new ListItem(e1.getText(), fnNormal); if (e1.getChild("url") != null) { item.add(getUrl(e1.getChild("url"))); } item.setMultipliedLeading(1.8F); list.add(item); } list.getFirstItem().setSpacingBefore(-14); list.getLastItem().setSpacingAfter(14); section.add(list); }
From source file:com.athena.chameleon.engine.utils.PDFWriterUtil.java
License:Apache License
/** * // w ww. j a v a 2 s . c o m * url Mapping * * @param e url element * @return Chunk * @throws Exception */ public static Chunk getUrl(Element e) throws Exception { Chunk url = new Chunk(e.getText(), fnURL); url.setAction(new PdfAction(new URL(e.getText()))); return url; }
From source file:com.athena.chameleon.engine.utils.PDFWriterUtil.java
License:Apache License
/** * /*from ww w. j ava 2 s.c o m*/ * image * * @param section image section ? * @param e image element * @throws Exception */ public static void setImage(Section section, Element e) throws Exception { Image img = Image.getInstance(PDFDocGenerator.class.getResource(e.getText())); img.setAlignment(com.itextpdf.text.Element.ALIGN_CENTER); if (e.getAttributeValue("scale") != null) { float scale = Float.parseFloat(e.getAttributeValue("scale")); img.scalePercent(scale, scale); } section.add(img); }
From source file:com.athena.chameleon.engine.utils.PDFWriterUtil.java
License:Apache License
/** * /* w ww .j a va 2 s. c o m*/ * ?? header * * @param e table element * @param t header table ? * @param colCount column * @throws Exception */ public static void setTableHeader(Element e, PdfPTable t, int colCount) throws Exception { t.getDefaultCell().setBackgroundColor(new BaseColor(217, 217, 217)); t.getDefaultCell().setHorizontalAlignment(com.itextpdf.text.Element.ALIGN_CENTER); ArrayList<Integer> colWidth = new ArrayList<Integer>(); for (Element e1 : e.getChild("header").getChildren()) { t.addCell(new Phrase(e1.getText(), fnNormalBold)); if (e1.getAttributeValue("width") != null) colWidth.add(Integer.parseInt(e1.getAttributeValue("width"))); } if (colCount == colWidth.size()) { int[] col = new int[colCount]; for (int i = 0; i < colCount; i++) col[i] = colWidth.get(i); t.setWidths(col); } }
From source file:com.athena.chameleon.engine.utils.PDFWriterUtil.java
License:Apache License
/** * //ww w.j a va2s. co m * ?? row * * @param e table element * @param t header table ? * @throws Exception */ public static void setTableRow(Element e, PdfPTable t) throws Exception { t.getDefaultCell().setBackgroundColor(new BaseColor(255, 255, 255)); t.getDefaultCell().setHorizontalAlignment(com.itextpdf.text.Element.ALIGN_LEFT); for (Element e1 : e.getChild("row").getChildren()) { t.addCell(new Phrase(e1.getText(), fnNormal)); } }
From source file:com.aurum.whitehole.ObjectDB.java
License:Open Source License
public static void init() { fallback = true;/*from w ww . j av a 2 s. c o m*/ timestamp = 0; categories = new LinkedHashMap(); objects = new LinkedHashMap(); File odbfile = new File("objectdb.xml"); if (!(odbfile.exists() && odbfile.isFile())) return; try { Element root = new SAXBuilder().build(odbfile).getRootElement(); timestamp = root.getAttribute("timestamp").getLongValue(); List<Element> catelems = root.getChild("categories").getChildren("category"); for (Element catelem : catelems) categories.put(catelem.getAttribute("id").getIntValue(), catelem.getText()); List<Element> objelems = root.getChildren("object"); for (Element objelem : objelems) { Object entry = new Object(); entry.ID = objelem.getAttributeValue("id"); entry.name = objelem.getChildText("name"); entry.category = objelem.getChild("category").getAttribute("id").getIntValue(); entry.type = objelem.getChild("preferredfile").getAttributeValue("name"); entry.notes = objelem.getChildText("notes"); Element flags = objelem.getChild("flags"); entry.games = flags.getAttribute("games").getIntValue(); entry.known = flags.getAttribute("known").getIntValue(); entry.complete = flags.getAttribute("complete").getIntValue(); if (entry.notes.isEmpty() || entry.notes.equals("")) entry.notes = "(No description found for this objects.)"; if (entry.type.isEmpty() || entry.notes.equals("")) entry.type = "Unknown"; entry.files = new ArrayList(); String files = objelem.getChildText("files"); for (String file : files.split("\n")) { entry.files.add(file); } List<Element> fields = objelem.getChildren("field"); entry.fields = new HashMap(fields.size()); if (!fields.isEmpty()) { for (Element field : fields) { Object.Field fielddata = new Object.Field(); fielddata.ID = field.getAttribute("id").getIntValue(); fielddata.type = field.getAttributeValue("type"); fielddata.name = field.getAttributeValue("name"); fielddata.values = field.getAttributeValue("values"); fielddata.notes = field.getAttributeValue("notes"); entry.fields.put(fielddata.ID, fielddata); } } objects.put(entry.ID, entry); } } catch (IOException | JDOMException ex) { timestamp = 0; return; } fallback = false; }
From source file:com.bc.ceres.site.util.ExclusionListBuilder.java
License:Open Source License
static void addPomToExclusionList(File exclusionList, URL pom) throws Exception { try (BufferedWriter writer = new BufferedWriter(new FileWriter(exclusionList, true))) { final DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); final Document w3cDoc = builder.parse(pom.openStream()); final DOMBuilder domBuilder = new DOMBuilder(); final org.jdom2.Document doc = domBuilder.build(w3cDoc); final Element root = doc.getRootElement(); final Namespace namespace = root.getNamespace(); final List<Element> modules = root.getChildren(MODULES_NODE, namespace); if (modules != null) { // hard-coded index 0 is ok because xml-schema allows only one <modules>-node final Element modulesNode = modules.get(0); final List<Element> modulesList = modulesNode.getChildren(MODULE_NAME, namespace); for (Element module : modulesList) { addModuleToExclusionList(exclusionList, writer, module.getText()); }/*from ww w . j a v a2 s .c o m*/ } } catch (Exception e) { e.printStackTrace(); } }
From source file:com.bio4j.neo4jdb.programs.ImportGeneOntology.java
License:Open Source License
public static void main(String[] args) { if (args.length != 3) { System.out.println(/* w w w .j a v a 2 s .co m*/ "This program expects the following parameters: \n" + "1. Gene ontology xml filename \n" + "2. Bio4j DB folder \n" + "3. Batch inserter .properties file"); } else { long initTime = System.nanoTime(); File inFile = new File(args[0]); BatchInserter inserter = null; BatchInserterIndexProvider indexProvider = null; BatchInserterIndex goTermIdIndex; BatchInserterIndex isAGoRelIndex; BatchInserterIndex nodeTypeIndex; BufferedWriter statsBuff = null; int termCounter = 0; int limitForPrintingOut = 10000; try { // This block configures the logger with handler and formatter fh = new FileHandler("ImportGeneOntology.log", true); SimpleFormatter formatter = new SimpleFormatter(); fh.setFormatter(formatter); logger.addHandler(fh); logger.setLevel(Level.ALL); //---creating writer for stats file----- statsBuff = new BufferedWriter(new FileWriter(new File("ImportGeneOntologyStats.txt"))); // create the batch inserter inserter = BatchInserters.inserter(args[1], MapUtil.load(new File(args[2]))); // create the batch index service indexProvider = new LuceneBatchInserterIndexProvider(inserter); Map<String, String> indexProps = MapUtil.stringMap("provider", "lucene", "type", "exact"); goTermIdIndex = indexProvider.nodeIndex(GoTermNode.GO_TERM_ID_INDEX, indexProps); isAGoRelIndex = indexProvider.relationshipIndex(IsAGoRel.IS_A_REL_INDEX, indexProps); nodeTypeIndex = indexProvider.nodeIndex(Bio4jManager.NODE_TYPE_INDEX_NAME, indexProps); //------------------nodes properties maps----------------------------------- Map<String, Object> goProperties = new HashMap<String, Object>(); goProperties.put(GoTermNode.NODE_TYPE_PROPERTY, GoTermNode.NODE_TYPE); //-------------------------------------------------------------------------- //--------------------------------relationships------------------------------------------ IsAGoRel isAGoRel = new IsAGoRel(null); RegulatesGoRel regulatesGoRel = new RegulatesGoRel(null); NegativelyRegulatesGoRel negativelyRegulatesGoRel = new NegativelyRegulatesGoRel(null); PositivelyRegulatesGoRel positivelyRegulatesGoRel = new PositivelyRegulatesGoRel(null); PartOfGoRel partOfGoRel = new PartOfGoRel(null); HasPartOfGoRel hasPartGoRel = new HasPartOfGoRel(null); //-------------------------------------------------------------------------- Map<String, ArrayList<String>> termParentsMap = new HashMap<String, ArrayList<String>>(); Map<String, ArrayList<String>> regulatesMap = new HashMap<String, ArrayList<String>>(); Map<String, ArrayList<String>> negativelyRegulatesMap = new HashMap<String, ArrayList<String>>(); Map<String, ArrayList<String>> positivelyRegulatesMap = new HashMap<String, ArrayList<String>>(); Map<String, ArrayList<String>> partOfMap = new HashMap<String, ArrayList<String>>(); Map<String, ArrayList<String>> hasPartMap = new HashMap<String, ArrayList<String>>(); BufferedReader reader = new BufferedReader(new FileReader(inFile)); String line; StringBuilder termStBuilder = new StringBuilder(); logger.log(Level.INFO, "inserting nodes...."); //-----first I create all the elements whitout their relationships------------- while ((line = reader.readLine()) != null) { if (line.trim().startsWith("<" + TERM_TAG_NAME)) { while (!line.trim().startsWith("</" + TERM_TAG_NAME + ">")) { termStBuilder.append(line); line = reader.readLine(); } //linea final del organism termStBuilder.append(line); //System.out.println("organismStBuilder.toString() = " + organismStBuilder.toString()); XMLElement termXMLElement = new XMLElement(termStBuilder.toString()); termStBuilder.delete(0, termStBuilder.length()); String goId = termXMLElement.asJDomElement().getChildText(ID_TAG_NAME); String goName = termXMLElement.asJDomElement().getChildText(NAME_TAG_NAME); if (goName == null) { goName = ""; } String goNamespace = termXMLElement.asJDomElement().getChildText(NAMESPACE_TAG_NAME); if (goNamespace == null) { goNamespace = ""; } String goDefinition = ""; Element defElem = termXMLElement.asJDomElement().getChild(DEF_TAG_NAME); if (defElem != null) { Element defstrElem = defElem.getChild(DEFSTR_TAG_NAME); if (defstrElem != null) { goDefinition = defstrElem.getText(); } } String goComment = termXMLElement.asJDomElement().getChildText(COMMENT_TAG_NAME); if (goComment == null) { goComment = ""; } String goIsObsolete = termXMLElement.asJDomElement().getChildText(IS_OBSOLETE_TAG_NAME); if (goIsObsolete == null) { goIsObsolete = ""; } else { if (goIsObsolete.equals("1")) { goIsObsolete = "true"; } else { goIsObsolete = "false"; } } List<Element> altIdElems = termXMLElement.asJDomElement().getChildren("alt_id"); String[] alternativeIds = new String[altIdElems.size()]; for (int i = 0; i < altIdElems.size(); i++) { alternativeIds[i] = altIdElems.get(i).getText(); } //----term parents---- List<Element> termParentTerms = termXMLElement.asJDomElement() .getChildren(IsAGoRel.OBOXML_RELATIONSHIP_NAME); ArrayList<String> array = new ArrayList<String>(); for (Element elem : termParentTerms) { array.add(elem.getText().trim()); } termParentsMap.put(goId, array); //--------------------- //-------relationship tags----------- List<Element> relationshipTags = termXMLElement.asJDomElement() .getChildren(RELATIONSHIP_TAG_NAME); for (Element relationshipTag : relationshipTags) { String relType = relationshipTag.getChildText("type"); String toSt = relationshipTag.getChildText("to"); if (relType.equals(RegulatesGoRel.OBOXML_RELATIONSHIP_NAME)) { ArrayList<String> tempArray = regulatesMap.get(goId); if (tempArray == null) { tempArray = new ArrayList<String>(); regulatesMap.put(goId, tempArray); } tempArray.add(toSt); } else if (relType.equals(PositivelyRegulatesGoRel.OBOXML_RELATIONSHIP_NAME)) { ArrayList<String> tempArray = positivelyRegulatesMap.get(goId); if (tempArray == null) { tempArray = new ArrayList<String>(); positivelyRegulatesMap.put(goId, tempArray); } tempArray.add(toSt); } else if (relType.equals(NegativelyRegulatesGoRel.OBOXML_RELATIONSHIP_NAME)) { ArrayList<String> tempArray = negativelyRegulatesMap.get(goId); if (tempArray == null) { tempArray = new ArrayList<String>(); negativelyRegulatesMap.put(goId, tempArray); } tempArray.add(toSt); } else if (relType.equals(PartOfGoRel.OBOXML_RELATIONSHIP_NAME)) { ArrayList<String> tempArray = partOfMap.get(goId); if (tempArray == null) { tempArray = new ArrayList<String>(); partOfMap.put(goId, tempArray); } tempArray.add(toSt); } else if (relType.equals(HasPartOfGoRel.OBOXML_RELATIONSHIP_NAME)) { ArrayList<String> tempArray = hasPartMap.get(goId); if (tempArray == null) { tempArray = new ArrayList<String>(); hasPartMap.put(goId, tempArray); } tempArray.add(toSt); } } //------------------------------------- goProperties.put(GoTermNode.ID_PROPERTY, goId); goProperties.put(GoTermNode.NAME_PROPERTY, goName); goProperties.put(GoTermNode.DEFINITION_PROPERTY, goDefinition); goProperties.put(GoTermNode.NAMESPACE_PROPERTY, goNamespace); goProperties.put(GoTermNode.ALTERNATIVE_IDS_PROPERTY, alternativeIds); goProperties.put(GoTermNode.OBSOLETE_PROPERTY, goIsObsolete); goProperties.put(GoTermNode.COMMENT_PROPERTY, goComment); long currentGoTermId = inserter.createNode(goProperties); //--------indexing term by id (and alternative ids)---------- goTermIdIndex.add(currentGoTermId, MapUtil.map(GoTermNode.GO_TERM_ID_INDEX, goId)); for (int i = 0; i < alternativeIds.length; i++) { goTermIdIndex.add(currentGoTermId, MapUtil.map(GoTermNode.GO_TERM_ID_INDEX, alternativeIds[i])); } //--------indexing node by node_type index---------- nodeTypeIndex.add(currentGoTermId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, GoTermNode.NODE_TYPE)); } termCounter++; if ((termCounter % limitForPrintingOut) == 0) { logger.log(Level.INFO, (termCounter + " terms inserted!!")); } } reader.close(); //flushing index goTermIdIndex.flush(); //----------------------------------------------------------------------- logger.log(Level.INFO, "Inserting relationships...."); logger.log(Level.INFO, "'is_a' relationships...."); //-------------------'is_a' relationships----------------- Set<String> keys = termParentsMap.keySet(); for (String key : keys) { long currentNodeId = goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, key).getSingle(); ArrayList<String> tempArray = termParentsMap.get(key); for (String string : tempArray) { long tempNodeId = goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, string).getSingle(); long isAGorelId = inserter.createRelationship(currentNodeId, tempNodeId, isAGoRel, null); //System.out.println("key = " + key); isAGoRelIndex.add(isAGorelId, MapUtil.map(IsAGoRel.IS_A_REL_INDEX, String.valueOf(currentNodeId))); //System.out.println("indexing key = " + key); } } logger.log(Level.INFO, "'regulates' relationships...."); //-------------------'regulates' relationships---------------------- keys = regulatesMap.keySet(); for (String key : keys) { long currentNodeId = goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, key).getSingle(); ArrayList<String> tempArray = regulatesMap.get(key); for (String string : tempArray) { long tempNodeId = goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, string).getSingle(); inserter.createRelationship(currentNodeId, tempNodeId, regulatesGoRel, null); } } logger.log(Level.INFO, "'negatively_regulates' relationships...."); //-------------------'regulates' relationships---------------------- keys = negativelyRegulatesMap.keySet(); for (String key : keys) { long currentNodeId = goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, key).getSingle(); ArrayList<String> tempArray = negativelyRegulatesMap.get(key); for (String string : tempArray) { long tempNodeId = goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, string).getSingle(); inserter.createRelationship(currentNodeId, tempNodeId, negativelyRegulatesGoRel, null); } } logger.log(Level.INFO, "'positively_regulates' relationships...."); //-------------------'regulates' relationships---------------------- keys = positivelyRegulatesMap.keySet(); for (String key : keys) { long currentNodeId = goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, key).getSingle(); ArrayList<String> tempArray = positivelyRegulatesMap.get(key); for (String string : tempArray) { long tempNodeId = goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, string).getSingle(); inserter.createRelationship(currentNodeId, tempNodeId, positivelyRegulatesGoRel, null); } } logger.log(Level.INFO, "'part_of' relationships...."); //-------------------'regulates' relationships---------------------- keys = partOfMap.keySet(); for (String key : keys) { long currentNodeId = goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, key).getSingle(); ArrayList<String> tempArray = partOfMap.get(key); for (String string : tempArray) { long tempNodeId = goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, string).getSingle(); inserter.createRelationship(currentNodeId, tempNodeId, partOfGoRel, null); } } logger.log(Level.INFO, "'has_part' relationships...."); //-------------------'regulates' relationships---------------------- keys = hasPartMap.keySet(); for (String key : keys) { long currentNodeId = goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, key).getSingle(); ArrayList<String> tempArray = hasPartMap.get(key); for (String string : tempArray) { long tempNodeId = goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, string).getSingle(); inserter.createRelationship(currentNodeId, tempNodeId, hasPartGoRel, null); } } logger.log(Level.INFO, "Done! :)"); } catch (Exception e) { logger.log(Level.SEVERE, e.getMessage()); StackTraceElement[] trace = e.getStackTrace(); for (StackTraceElement stackTraceElement : trace) { logger.log(Level.SEVERE, stackTraceElement.toString()); } } finally { try { //closing logger file handler fh.close(); logger.log(Level.INFO, "Closing up inserter and index service...."); // shutdown, makes sure all changes are written to disk indexProvider.shutdown(); inserter.shutdown(); //-----------------writing stats file--------------------- long elapsedTime = System.nanoTime() - initTime; long elapsedSeconds = Math.round((elapsedTime / 1000000000.0)); long hours = elapsedSeconds / 3600; long minutes = (elapsedSeconds % 3600) / 60; long seconds = (elapsedSeconds % 3600) % 60; statsBuff.write("Statistics for program ImportGeneOntology:\nInput file: " + inFile.getName() + "\nThere were " + termCounter + " terms inserted.\n" + "The elapsed time was: " + hours + "h " + minutes + "m " + seconds + "s\n"); //---closing stats writer--- statsBuff.close(); } catch (Exception e) { logger.log(Level.SEVERE, e.getMessage()); StackTraceElement[] trace = e.getStackTrace(); for (StackTraceElement stackTraceElement : trace) { logger.log(Level.SEVERE, stackTraceElement.toString()); } } } } }
From source file:com.bio4j.neo4jdb.programs.ImportProteinInteractions.java
License:Open Source License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("This program expects the following parameters: \n" + "1. Uniprot xml filename \n" + "2. Bio4j DB folder\n" + "3. Batch inserter .properties file"); } else {/*from w w w .j a v a 2 s. c om*/ long initTime = System.nanoTime(); File inFile = new File(args[0]); BatchInserter inserter = null; BatchInserterIndexProvider indexProvider = null; String accessionSt = ""; BufferedWriter statsBuff = null; int proteinCounter = 0; int limitForPrintingOut = 10000; try { // This block configure the logger with handler and formatter fh = new FileHandler("ImportProteinInteractions" + args[0].split("\\.")[0] + ".log", false); SimpleFormatter formatter = new SimpleFormatter(); fh.setFormatter(formatter); logger.addHandler(fh); logger.setLevel(Level.ALL); //--------------------------------- //---creating writer for stats file----- statsBuff = new BufferedWriter(new FileWriter( new File("ImportProteinInteractionsStats_" + inFile.getName().split("\\.")[0] + ".txt"))); // create the batch inserter inserter = BatchInserters.inserter(args[1], MapUtil.load(new File(args[2]))); // create the batch index service indexProvider = new LuceneBatchInserterIndexProvider(inserter); //------------------nodes properties maps----------------------------------- //--------------------------------------------------------------------- //-------------------relationships properties maps-------------------------- Map<String, Object> proteinProteinInteractionProperties = new HashMap<String, Object>(); Map<String, Object> proteinIsoformInteractionProperties = new HashMap<String, Object>(); //---------------------------------------------------------------------------- //--------------------------------relationships------------------------------------------ ProteinProteinInteractionRel proteinProteinInteractionRel = new ProteinProteinInteractionRel(null); ProteinIsoformInteractionRel proteinIsoformInteractionRel = new ProteinIsoformInteractionRel(null); //------------------------------------------------------------------------------------------------ //------------------indexes creation---------------------------------- BatchInserterIndex proteinAccessionIndex = indexProvider.nodeIndex( ProteinNode.PROTEIN_ACCESSION_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex isoformIdIndex = indexProvider.nodeIndex(IsoformNode.ISOFORM_ID_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); //-------------------------------------------------------------------- BufferedReader reader = new BufferedReader(new FileReader(inFile)); String line; StringBuilder entryStBuilder = new StringBuilder(); while ((line = reader.readLine()) != null) { if (line.trim().startsWith("<" + UniprotStuff.ENTRY_TAG_NAME)) { while (!line.trim().startsWith("</" + UniprotStuff.ENTRY_TAG_NAME + ">")) { entryStBuilder.append(line); line = reader.readLine(); } //linea final del organism entryStBuilder.append(line); //System.out.println("organismStBuilder.toString() = " + organismStBuilder.toString()); XMLElement entryXMLElem = new XMLElement(entryStBuilder.toString()); entryStBuilder.delete(0, entryStBuilder.length()); accessionSt = entryXMLElem.asJDomElement() .getChildText(UniprotStuff.ENTRY_ACCESSION_TAG_NAME); long currentProteinId = proteinAccessionIndex .get(ProteinNode.PROTEIN_ACCESSION_INDEX, accessionSt).getSingle(); List<Element> comments = entryXMLElem.asJDomElement() .getChildren(UniprotStuff.COMMENT_TAG_NAME); for (Element commentElem : comments) { String commentTypeSt = commentElem .getAttributeValue(UniprotStuff.COMMENT_TYPE_ATTRIBUTE); //----------interaction---------------- if (commentTypeSt.equals(ProteinProteinInteractionRel.UNIPROT_ATTRIBUTE_TYPE_VALUE)) { List<Element> interactants = commentElem.getChildren("interactant"); Element interactant1 = interactants.get(0); Element interactant2 = interactants.get(1); Element organismsDiffer = commentElem.getChild("organismsDiffer"); Element experiments = commentElem.getChild("experiments"); String intactId1St = interactant1.getAttributeValue("intactId"); String intactId2St = interactant2.getAttributeValue("intactId"); String organismsDifferSt = ""; String experimentsSt = ""; if (intactId1St == null) { intactId1St = ""; } if (intactId2St == null) { intactId2St = ""; } if (organismsDiffer != null) { organismsDifferSt = organismsDiffer.getText(); } if (experiments != null) { experimentsSt = experiments.getText(); } //----now we try to retrieve the interactant 2 accession-- String interactant2AccessionSt = interactant2.getChildText("id"); long protein2Id = -1; if (interactant2AccessionSt != null) { IndexHits<Long> protein2IdIndexHits = proteinAccessionIndex .get(ProteinNode.PROTEIN_ACCESSION_INDEX, interactant2AccessionSt); if (protein2IdIndexHits.hasNext()) { if (protein2IdIndexHits.size() == 1) { protein2Id = protein2IdIndexHits.getSingle(); } } if (protein2Id < 0) { //Since we did not find the protein we try to find a isoform instead long isoformId = -1; IndexHits<Long> isoformIdIndexHits = isoformIdIndex .get(IsoformNode.ISOFORM_ID_INDEX, interactant2AccessionSt); if (isoformIdIndexHits.hasNext()) { if (isoformIdIndexHits.size() == 1) { isoformId = isoformIdIndexHits.getSingle(); } } if (isoformId >= 0) { proteinIsoformInteractionProperties.put( ProteinIsoformInteractionRel.EXPERIMENTS_PROPERTY, experimentsSt); proteinIsoformInteractionProperties.put( ProteinIsoformInteractionRel.ORGANISMS_DIFFER_PROPERTY, organismsDifferSt); proteinIsoformInteractionProperties.put( ProteinIsoformInteractionRel.INTACT_ID_1_PROPERTY, intactId1St); proteinIsoformInteractionProperties.put( ProteinIsoformInteractionRel.INTACT_ID_2_PROPERTY, intactId2St); inserter.createRelationship(currentProteinId, isoformId, proteinIsoformInteractionRel, proteinIsoformInteractionProperties); } } else { proteinProteinInteractionProperties.put( ProteinProteinInteractionRel.EXPERIMENTS_PROPERTY, experimentsSt); proteinProteinInteractionProperties.put( ProteinProteinInteractionRel.ORGANISMS_DIFFER_PROPERTY, organismsDifferSt); proteinProteinInteractionProperties.put( ProteinProteinInteractionRel.INTACT_ID_1_PROPERTY, intactId1St); proteinProteinInteractionProperties.put( ProteinProteinInteractionRel.INTACT_ID_2_PROPERTY, intactId2St); inserter.createRelationship(currentProteinId, protein2Id, proteinProteinInteractionRel, proteinProteinInteractionProperties); } } } } proteinCounter++; if ((proteinCounter % limitForPrintingOut) == 0) { logger.log(Level.INFO, (proteinCounter + " proteins updated with interactions!!")); } } } reader.close(); } catch (Exception e) { logger.log(Level.SEVERE, ("Exception retrieving protein " + accessionSt)); logger.log(Level.SEVERE, e.getMessage()); StackTraceElement[] trace = e.getStackTrace(); for (StackTraceElement stackTraceElement : trace) { logger.log(Level.SEVERE, stackTraceElement.toString()); } } finally { //outbBuff.close(); try { // shutdown, makes sure all changes are written to disk indexProvider.shutdown(); inserter.shutdown(); //closing logger file handler fh.close(); //-----------------writing stats file--------------------- long elapsedTime = System.nanoTime() - initTime; long elapsedSeconds = Math.round((elapsedTime / 1000000000.0)); long hours = elapsedSeconds / 3600; long minutes = (elapsedSeconds % 3600) / 60; long seconds = (elapsedSeconds % 3600) % 60; statsBuff.write("Statistics for program ImportProteinInteractions:\nInput file: " + inFile.getName() + "\nThere were " + proteinCounter + " proteins analyzed.\n" + "The elapsed time was: " + hours + "h " + minutes + "m " + seconds + "s\n"); //---closing stats writer--- statsBuff.close(); } catch (Exception e) { logger.log(Level.SEVERE, ("Exception retrieving protein " + accessionSt)); logger.log(Level.SEVERE, e.getMessage()); StackTraceElement[] trace = e.getStackTrace(); for (StackTraceElement stackTraceElement : trace) { logger.log(Level.SEVERE, stackTraceElement.toString()); } //closing logger file handler fh.close(); } } } }
From source file:com.bio4j.neo4jdb.programs.ImportUniprot.java
License:Open Source License
public static void main(String[] args) { if (args.length != 4) { System.out.println("This program expects the following parameters: \n" + "1. Uniprot xml filename \n" + "2. Bio4j DB folder \n" + "3. batch inserter .properties file \n" + "4. Config XML file"); } else {// www. j a va2 s.c o m long initTime = System.nanoTime(); File inFile = new File(args[0]); File configFile = new File(args[3]); String currentAccessionId = ""; BatchInserter inserter = null; BatchInserterIndexProvider indexProvider = null; BufferedWriter enzymeIdsNotFoundBuff = null; BufferedWriter statsBuff = null; int proteinCounter = 0; int limitForPrintingOut = 10000; try { // This block configures the logger with handler and formatter fh = new FileHandler("ImportUniprot" + args[0].split("\\.")[0] + ".log", false); SimpleFormatter formatter = new SimpleFormatter(); fh.setFormatter(formatter); logger.addHandler(fh); logger.setLevel(Level.ALL); System.out.println("Reading conf file..."); BufferedReader reader = new BufferedReader(new FileReader(configFile)); String line; StringBuilder stBuilder = new StringBuilder(); while ((line = reader.readLine()) != null) { stBuilder.append(line); } reader.close(); UniprotDataXML uniprotDataXML = new UniprotDataXML(stBuilder.toString()); //---creating writer for enzymes not found file----- enzymeIdsNotFoundBuff = new BufferedWriter(new FileWriter(new File("EnzymeIdsNotFound.log"))); //---creating writer for stats file----- statsBuff = new BufferedWriter(new FileWriter( new File("ImportUniprotStats_" + inFile.getName().split("\\.")[0] + ".txt"))); // create the batch inserter inserter = BatchInserters.inserter(args[1], MapUtil.load(new File(args[2]))); // create the batch index service indexProvider = new LuceneBatchInserterIndexProvider(inserter); //-----------------create batch indexes---------------------------------- //---------------------------------------------------------------------- BatchInserterIndex proteinAccessionIndex = indexProvider.nodeIndex( ProteinNode.PROTEIN_ACCESSION_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex proteinFullNameFullTextIndex = indexProvider.nodeIndex( ProteinNode.PROTEIN_FULL_NAME_FULL_TEXT_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, FULL_TEXT_ST)); BatchInserterIndex proteinGeneNamesFullTextIndex = indexProvider.nodeIndex( ProteinNode.PROTEIN_GENE_NAMES_FULL_TEXT_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, FULL_TEXT_ST)); BatchInserterIndex proteinEnsemblPlantsIndex = indexProvider.nodeIndex( ProteinNode.PROTEIN_ENSEMBL_PLANTS_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex datasetNameIndex = indexProvider.nodeIndex(DatasetNode.DATASET_NAME_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex keywordIdIndex = indexProvider.nodeIndex(KeywordNode.KEYWORD_ID_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex keywordNameIndex = indexProvider.nodeIndex(KeywordNode.KEYWORD_NAME_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex interproIdIndex = indexProvider.nodeIndex(InterproNode.INTERPRO_ID_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex pfamIdIndex = indexProvider.nodeIndex(PfamNode.PFAM_ID_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex goTermIdIndex = indexProvider.nodeIndex(GoTermNode.GO_TERM_ID_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex organismScientificNameIndex = indexProvider.nodeIndex( OrganismNode.ORGANISM_SCIENTIFIC_NAME_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex organismNcbiTaxonomyIdIndex = indexProvider.nodeIndex( OrganismNode.ORGANISM_NCBI_TAXONOMY_ID_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex taxonNameIndex = indexProvider.nodeIndex(TaxonNode.TAXON_NAME_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex genomeElementVersionIndex = indexProvider.nodeIndex( GenomeElementNode.GENOME_ELEMENT_VERSION_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex reactomeTermIdIndex = indexProvider.nodeIndex( ReactomeTermNode.REACTOME_TERM_ID_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex enzymeIdIndex = indexProvider.nodeIndex(EnzymeNode.ENZYME_ID_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex nodeTypeIndex = indexProvider.nodeIndex(Bio4jManager.NODE_TYPE_INDEX_NAME, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); BatchInserterIndex mainNodesIndex = indexProvider.nodeIndex(Bio4jManager.MAIN_NODES_INDEX_NAME, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); //---------------------------------------------------------------------- //---------------------------------------------------------------------- reader = new BufferedReader(new FileReader(inFile)); StringBuilder entryStBuilder = new StringBuilder(); //---------------------------------------------------------------------- //------------------------looking up for main nodes--------------------- alternativeProductInitiationId = mainNodesIndex .get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.ALTERNATIVE_PRODUCT_INITIATION) .getSingle(); alternativeProductPromoterId = mainNodesIndex .get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.ALTERNATIVE_PRODUCT_PROMOTER) .getSingle(); alternativeProductSplicingId = mainNodesIndex .get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.ALTERNATIVE_PRODUCT_SPLICING) .getSingle(); alternativeProductRibosomalFrameshiftingId = mainNodesIndex.get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.ALTERNATIVE_PRODUCT_RIBOSOMAL_FRAMESHIFTING).getSingle(); seqCautionErroneousInitiationId = mainNodesIndex .get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.SEQUENCE_CAUTION_ERRONEOUS_INITIATION) .getSingle(); seqCautionErroneousTranslationId = mainNodesIndex.get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.SEQUENCE_CAUTION_ERRONEOUS_TRANSLATION).getSingle(); seqCautionFrameshiftId = mainNodesIndex .get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.SEQUENCE_CAUTION_FRAMESHIFT) .getSingle(); seqCautionErroneousTerminationId = mainNodesIndex.get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.SEQUENCE_CAUTION_ERRONEOUS_TERMINATION).getSingle(); seqCautionMiscellaneousDiscrepancyId = mainNodesIndex.get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.SEQUENCE_CAUTION_MISCELLANEOUS_DISCREPANCY).getSingle(); seqCautionErroneousGeneModelPredictionId = mainNodesIndex.get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.SEQUENCE_CAUTION_ERRONEOUS_GENE_MODEL_PREDICTION).getSingle(); //---------------------------------------------------------------------- //---------------------------------------------------------------------------------- //---------------------initializing node type properties---------------------------- organismProperties.put(OrganismNode.NODE_TYPE_PROPERTY, OrganismNode.NODE_TYPE); proteinProperties.put(ProteinNode.NODE_TYPE_PROPERTY, ProteinNode.NODE_TYPE); keywordProperties.put(KeywordNode.NODE_TYPE_PROPERTY, KeywordNode.NODE_TYPE); subcellularLocationProperties.put(SubcellularLocationNode.NODE_TYPE_PROPERTY, SubcellularLocationNode.NODE_TYPE); interproProperties.put(InterproNode.NODE_TYPE_PROPERTY, InterproNode.NODE_TYPE); pfamProperties.put(PfamNode.NODE_TYPE_PROPERTY, PfamNode.NODE_TYPE); taxonProperties.put(TaxonNode.NODE_TYPE_PROPERTY, TaxonNode.NODE_TYPE); datasetProperties.put(DatasetNode.NODE_TYPE_PROPERTY, DatasetNode.NODE_TYPE); personProperties.put(PersonNode.NODE_TYPE_PROPERTY, PersonNode.NODE_TYPE); consortiumProperties.put(ConsortiumNode.NODE_TYPE_PROPERTY, ConsortiumNode.NODE_TYPE); instituteProperties.put(InstituteNode.NODE_TYPE_PROPERTY, InstituteNode.NODE_TYPE); thesisProperties.put(ThesisNode.NODE_TYPE_PROPERTY, ThesisNode.NODE_TYPE); bookProperties.put(BookNode.NODE_TYPE_PROPERTY, BookNode.NODE_TYPE); patentProperties.put(PatentNode.NODE_TYPE_PROPERTY, PatentNode.NODE_TYPE); articleProperties.put(ArticleNode.NODE_TYPE_PROPERTY, ArticleNode.NODE_TYPE); submissionProperties.put(SubmissionNode.NODE_TYPE_PROPERTY, SubmissionNode.NODE_TYPE); onlineArticleProperties.put(OnlineArticleNode.NODE_TYPE_PROPERTY, OnlineArticleNode.NODE_TYPE); unpublishedObservationProperties.put(UnpublishedObservationNode.NODE_TYPE_PROPERTY, UnpublishedObservationNode.NODE_TYPE); publisherProperties.put(PublisherNode.NODE_TYPE_PROPERTY, PublisherNode.NODE_TYPE); cityProperties.put(CityNode.NODE_TYPE_PROPERTY, CityNode.NODE_TYPE); journalProperties.put(JournalNode.NODE_TYPE_PROPERTY, JournalNode.NODE_TYPE); onlineJournalProperties.put(OnlineJournalNode.NODE_TYPE_PROPERTY, OnlineJournalNode.NODE_TYPE); countryProperties.put(CountryNode.NODE_TYPE_PROPERTY, CountryNode.NODE_TYPE); isoformProperties.put(IsoformNode.NODE_TYPE_PROPERTY, IsoformNode.NODE_TYPE); commentTypeProperties.put(CommentTypeNode.NODE_TYPE_PROPERTY, CommentTypeNode.NODE_TYPE); featureTypeProperties.put(FeatureTypeNode.NODE_TYPE_PROPERTY, FeatureTypeNode.NODE_TYPE); //----------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------- while ((line = reader.readLine()) != null) { if (line.trim().startsWith("<" + UniprotStuff.ENTRY_TAG_NAME)) { while (!line.trim().startsWith("</" + UniprotStuff.ENTRY_TAG_NAME + ">")) { entryStBuilder.append(line); line = reader.readLine(); } //linea final del organism entryStBuilder.append(line); //System.out.println("organismStBuilder.toString() = " + organismStBuilder.toString()); XMLElement entryXMLElem = new XMLElement(entryStBuilder.toString()); entryStBuilder.delete(0, entryStBuilder.length()); String modifiedDateSt = entryXMLElem.asJDomElement() .getAttributeValue(UniprotStuff.ENTRY_MODIFIED_DATE_ATTRIBUTE); String accessionSt = entryXMLElem.asJDomElement() .getChildText(UniprotStuff.ENTRY_ACCESSION_TAG_NAME); String nameSt = entryXMLElem.asJDomElement().getChildText(UniprotStuff.ENTRY_NAME_TAG_NAME); String fullNameSt = getProteinFullName( entryXMLElem.asJDomElement().getChild(UniprotStuff.PROTEIN_TAG_NAME)); String shortNameSt = getProteinShortName( entryXMLElem.asJDomElement().getChild(UniprotStuff.PROTEIN_TAG_NAME)); if (shortNameSt == null) { shortNameSt = ""; } if (fullNameSt == null) { fullNameSt = ""; } currentAccessionId = accessionSt; //-----------alternative accessions------------- ArrayList<String> alternativeAccessions = new ArrayList<>(); List<Element> altAccessionsList = entryXMLElem.asJDomElement() .getChildren(UniprotStuff.ENTRY_ACCESSION_TAG_NAME); for (int i = 1; i < altAccessionsList.size(); i++) { alternativeAccessions.add(altAccessionsList.get(i).getText()); } proteinProperties.put(ProteinNode.ALTERNATIVE_ACCESSIONS_PROPERTY, convertToStringArray(alternativeAccessions)); //-----db references------------- String pirIdSt = ""; String keggIdSt = ""; String ensemblIdSt = ""; String uniGeneIdSt = ""; String arrayExpressIdSt = ""; List<Element> dbReferenceList = entryXMLElem.asJDomElement() .getChildren(UniprotStuff.DB_REFERENCE_TAG_NAME); ArrayList<String> emblCrossReferences = new ArrayList<>(); ArrayList<String> refseqReferences = new ArrayList<>(); ArrayList<String> enzymeDBReferences = new ArrayList<>(); ArrayList<String> ensemblPlantsReferences = new ArrayList<>(); HashMap<String, String> reactomeReferences = new HashMap<>(); for (Element dbReferenceElem : dbReferenceList) { String refId = dbReferenceElem.getAttributeValue("id"); switch (dbReferenceElem.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE)) { case "Ensembl": ensemblIdSt = refId; break; case "PIR": pirIdSt = refId; break; case "UniGene": uniGeneIdSt = refId; break; case "KEGG": keggIdSt = refId; break; case "EMBL": emblCrossReferences.add(refId); break; case "EC": enzymeDBReferences.add(refId); break; case "ArrayExpress": arrayExpressIdSt = refId; break; case "RefSeq": //refseqReferences.add(refId); List<Element> children = dbReferenceElem.getChildren("property"); for (Element propertyElem : children) { if (propertyElem.getAttributeValue("type").equals("nucleotide sequence ID")) { refseqReferences.add(propertyElem.getAttributeValue("value")); } } break; case "Reactome": Element propertyElem = dbReferenceElem.getChild("property"); String pathwayName = ""; if (propertyElem.getAttributeValue("type").equals("pathway name")) { pathwayName = propertyElem.getAttributeValue("value"); } reactomeReferences.put(refId, pathwayName); break; case "EnsemblPlants": ensemblPlantsReferences.add(refId); break; } } Element sequenceElem = entryXMLElem.asJDomElement() .getChild(UniprotStuff.ENTRY_SEQUENCE_TAG_NAME); String sequenceSt = sequenceElem.getText(); int seqLength = Integer .parseInt(sequenceElem.getAttributeValue(UniprotStuff.SEQUENCE_LENGTH_ATTRIBUTE)); float seqMass = Float .parseFloat(sequenceElem.getAttributeValue(UniprotStuff.SEQUENCE_MASS_ATTRIBUTE)); //System.out.println("lalala " + seqMass); proteinProperties.put(ProteinNode.MODIFIED_DATE_PROPERTY, modifiedDateSt); proteinProperties.put(ProteinNode.ACCESSION_PROPERTY, accessionSt); proteinProperties.put(ProteinNode.NAME_PROPERTY, nameSt); proteinProperties.put(ProteinNode.FULL_NAME_PROPERTY, fullNameSt); proteinProperties.put(ProteinNode.SHORT_NAME_PROPERTY, shortNameSt); proteinProperties.put(ProteinNode.SEQUENCE_PROPERTY, sequenceSt); proteinProperties.put(ProteinNode.LENGTH_PROPERTY, seqLength); proteinProperties.put(ProteinNode.MASS_PROPERTY, seqMass); proteinProperties.put(ProteinNode.ARRAY_EXPRESS_ID_PROPERTY, arrayExpressIdSt); proteinProperties.put(ProteinNode.PIR_ID_PROPERTY, pirIdSt); proteinProperties.put(ProteinNode.KEGG_ID_PROPERTY, keggIdSt); proteinProperties.put(ProteinNode.EMBL_REFERENCES_PROPERTY, convertToStringArray(emblCrossReferences)); proteinProperties.put(ProteinNode.ENSEMBL_PLANTS_REFERENCES_PROPERTY, convertToStringArray(ensemblPlantsReferences)); proteinProperties.put(ProteinNode.ENSEMBL_ID_PROPERTY, ensemblIdSt); proteinProperties.put(ProteinNode.UNIGENE_ID_PROPERTY, uniGeneIdSt); //---------------gene-names------------------- Element geneElement = entryXMLElem.asJDomElement().getChild(UniprotStuff.GENE_TAG_NAME); ArrayList<String> geneNames = new ArrayList<>(); if (geneElement != null) { List<Element> genesList = geneElement.getChildren(UniprotStuff.GENE_NAME_TAG_NAME); for (Element geneNameElem : genesList) { geneNames.add(geneNameElem.getText()); } } proteinProperties.put(ProteinNode.GENE_NAMES_PROPERTY, convertToStringArray(geneNames)); //----------------------------------------- long currentProteinId = inserter.createNode(proteinProperties); proteinAccessionIndex.add(currentProteinId, MapUtil.map(ProteinNode.PROTEIN_ACCESSION_INDEX, accessionSt)); //indexing protein by alternative accessions for (String altAccessionSt : alternativeAccessions) { proteinAccessionIndex.add(currentProteinId, MapUtil.map(ProteinNode.PROTEIN_ACCESSION_INDEX, altAccessionSt)); } //---flushing protein accession index---- proteinAccessionIndex.flush(); //---adding protein node to node_type index---- nodeTypeIndex.add(currentProteinId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, ProteinNode.NODE_TYPE)); //indexing protein by full name if (!fullNameSt.isEmpty()) { proteinFullNameFullTextIndex.add(currentProteinId, MapUtil.map(ProteinNode.PROTEIN_FULL_NAME_FULL_TEXT_INDEX, fullNameSt)); //System.out.println(fullNameSt.toUpperCase() + " , " + currentProteinId); } //indexing protein by gene names String geneNamesStToBeIndexed = ""; for (String geneNameSt : geneNames) { geneNamesStToBeIndexed += geneNameSt + " "; } proteinGeneNamesFullTextIndex.add(currentProteinId, MapUtil .map(ProteinNode.PROTEIN_GENE_NAMES_FULL_TEXT_INDEX, geneNamesStToBeIndexed)); //indexing protein by Ensembl plants references for (String ensemblPlantRef : ensemblPlantsReferences) { proteinEnsemblPlantsIndex.add(currentProteinId, MapUtil.map(ProteinNode.PROTEIN_ENSEMBL_PLANTS_INDEX, ensemblPlantRef)); } //--------------refseq associations---------------- if (uniprotDataXML.getRefseq()) { for (String refseqReferenceSt : refseqReferences) { //System.out.println("refseqReferenceSt = " + refseqReferenceSt); IndexHits<Long> hits = genomeElementVersionIndex .get(GenomeElementNode.GENOME_ELEMENT_VERSION_INDEX, refseqReferenceSt); if (hits.hasNext()) { inserter.createRelationship(currentProteinId, hits.getSingle(), proteinGenomeElementRel, null); } else { logger.log(Level.INFO, ("GenomeElem not found for: " + currentAccessionId + " , " + refseqReferenceSt)); } } } //--------------reactome associations---------------- if (uniprotDataXML.getReactome()) { for (String reactomeId : reactomeReferences.keySet()) { long reactomeTermNodeId = -1; IndexHits<Long> reactomeTermIdIndexHits = reactomeTermIdIndex .get(ReactomeTermNode.REACTOME_TERM_ID_INDEX, reactomeId); if (reactomeTermIdIndexHits.hasNext()) { reactomeTermNodeId = reactomeTermIdIndexHits.getSingle(); } if (reactomeTermNodeId < 0) { reactomeTermProperties.put(ReactomeTermNode.ID_PROPERTY, reactomeId); reactomeTermProperties.put(ReactomeTermNode.PATHWAY_NAME_PROPERTY, reactomeReferences.get(reactomeId)); reactomeTermNodeId = inserter.createNode(reactomeTermProperties); reactomeTermIdIndex.add(reactomeTermNodeId, MapUtil.map(ReactomeTermNode.REACTOME_TERM_ID_INDEX, reactomeId)); //----flushing reactome index--- reactomeTermIdIndex.flush(); //---adding reactome term node to node_type index---- nodeTypeIndex.add(reactomeTermNodeId, MapUtil .map(Bio4jManager.NODE_TYPE_INDEX_NAME, ReactomeTermNode.NODE_TYPE)); } inserter.createRelationship(currentProteinId, reactomeTermNodeId, proteinReactomeRel, null); } } //------------------------------------------------------- //---------------enzyme db associations---------------------- if (uniprotDataXML.getEnzymeDb()) { for (String enzymeDBRef : enzymeDBReferences) { long enzymeNodeId; IndexHits<Long> enzymeIdIndexHits = enzymeIdIndex.get(EnzymeNode.ENZYME_ID_INDEX, enzymeDBRef); if (enzymeIdIndexHits.hasNext()) { enzymeNodeId = enzymeIdIndexHits.next(); inserter.createRelationship(currentProteinId, enzymeNodeId, proteinEnzymaticActivityRel, null); } else { enzymeIdsNotFoundBuff.write( "Enzyme term: " + enzymeDBRef + " not found.\t" + currentAccessionId); } } } //------------------------------------------------------------ //-----comments import--- if (uniprotDataXML.getComments()) { importProteinComments(entryXMLElem, inserter, indexProvider, currentProteinId, sequenceSt, uniprotDataXML); } //-----features import---- if (uniprotDataXML.getFeatures()) { importProteinFeatures(entryXMLElem, inserter, indexProvider, currentProteinId); } //--------------------------------datasets-------------------------------------------------- String proteinDataSetSt = entryXMLElem.asJDomElement() .getAttributeValue(UniprotStuff.ENTRY_DATASET_ATTRIBUTE); //long datasetId = indexService.getSingleNode(DatasetNode.DATASET_NAME_INDEX, proteinDataSetSt); long datasetId = -1; IndexHits<Long> datasetNameIndexHits = datasetNameIndex.get(DatasetNode.DATASET_NAME_INDEX, proteinDataSetSt); if (datasetNameIndexHits.hasNext()) { datasetId = datasetNameIndexHits.getSingle(); } if (datasetId < 0) { datasetProperties.put(DatasetNode.NAME_PROPERTY, proteinDataSetSt); datasetId = inserter.createNode(datasetProperties); datasetNameIndex.add(datasetId, MapUtil.map(DatasetNode.DATASET_NAME_INDEX, proteinDataSetSt)); //----flushing dataset name index--- datasetNameIndex.flush(); //---adding dataset node to node_type index---- nodeTypeIndex.add(datasetId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, DatasetNode.NODE_TYPE)); } inserter.createRelationship(currentProteinId, datasetId, proteinDatasetRel, null); //--------------------------------------------------------------------------------------------- if (uniprotDataXML.getCitations()) { importProteinCitations(entryXMLElem, inserter, indexProvider, currentProteinId, uniprotDataXML); } //-------------------------------keywords------------------------------------------------------ if (uniprotDataXML.getKeywords()) { List<Element> keywordsList = entryXMLElem.asJDomElement() .getChildren(UniprotStuff.KEYWORD_TAG_NAME); for (Element keywordElem : keywordsList) { String keywordId = keywordElem.getAttributeValue(UniprotStuff.KEYWORD_ID_ATTRIBUTE); String keywordName = keywordElem.getText(); long keywordNodeId = -1; IndexHits<Long> keyworIdIndexHits = keywordIdIndex.get(KeywordNode.KEYWORD_ID_INDEX, keywordId); if (keyworIdIndexHits.hasNext()) { keywordNodeId = keyworIdIndexHits.getSingle(); } if (keywordNodeId < 0) { keywordProperties.put(KeywordNode.ID_PROPERTY, keywordId); keywordProperties.put(KeywordNode.NAME_PROPERTY, keywordName); keywordNodeId = inserter.createNode(keywordProperties); keywordIdIndex.add(keywordNodeId, MapUtil.map(KeywordNode.KEYWORD_ID_INDEX, keywordId)); keywordNameIndex.add(keywordNodeId, MapUtil.map(KeywordNode.KEYWORD_NAME_INDEX, keywordName)); //---flushing keyword id index---- keywordIdIndex.flush(); //---adding keyword node to node_type index---- nodeTypeIndex.add(keywordNodeId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, KeywordNode.NODE_TYPE)); } inserter.createRelationship(currentProteinId, keywordNodeId, proteinKeywordRel, null); } } //--------------------------------------------------------------------------------------- for (Element dbReferenceElem : dbReferenceList) { //-------------------------------INTERPRO------------------------------------------------------ if (dbReferenceElem.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE) .equals(UniprotStuff.INTERPRO_DB_REFERENCE_TYPE)) { if (uniprotDataXML.getInterpro()) { String interproId = dbReferenceElem .getAttributeValue(UniprotStuff.DB_REFERENCE_ID_ATTRIBUTE); //long interproNodeId = indexService.getSingleNode(InterproNode.INTERPRO_ID_INDEX, interproId); long interproNodeId = -1; IndexHits<Long> interproIdIndexHits = interproIdIndex .get(InterproNode.INTERPRO_ID_INDEX, interproId); if (interproIdIndexHits.hasNext()) { interproNodeId = interproIdIndexHits.getSingle(); } if (interproNodeId < 0) { String interproEntryNameSt = ""; List<Element> properties = dbReferenceElem .getChildren(UniprotStuff.DB_REFERENCE_PROPERTY_TAG_NAME); for (Element prop : properties) { if (prop.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE) .equals(UniprotStuff.INTERPRO_ENTRY_NAME)) { interproEntryNameSt = prop.getAttributeValue( UniprotStuff.DB_REFERENCE_VALUE_ATTRIBUTE); break; } } interproProperties.put(InterproNode.ID_PROPERTY, interproId); interproProperties.put(InterproNode.NAME_PROPERTY, interproEntryNameSt); interproNodeId = inserter.createNode(interproProperties); interproIdIndex.add(interproNodeId, MapUtil.map(InterproNode.INTERPRO_ID_INDEX, interproId)); //flushing interpro id index interproIdIndex.flush(); //---adding interpro node to node_type index---- nodeTypeIndex.add(interproNodeId, MapUtil .map(Bio4jManager.NODE_TYPE_INDEX_NAME, InterproNode.NODE_TYPE)); } inserter.createRelationship(currentProteinId, interproNodeId, proteinInterproRel, null); } } //-------------------------------PFAM------------------------------------------------------ else if (dbReferenceElem.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE) .equals("Pfam")) { if (uniprotDataXML.getPfam()) { String pfamId = dbReferenceElem .getAttributeValue(UniprotStuff.DB_REFERENCE_ID_ATTRIBUTE); long pfamNodeId = -1; IndexHits<Long> pfamIdIndexHits = pfamIdIndex.get(PfamNode.PFAM_ID_INDEX, pfamId); if (pfamIdIndexHits.hasNext()) { pfamNodeId = pfamIdIndexHits.getSingle(); } if (pfamNodeId < 0) { String pfamEntryNameSt = ""; List<Element> properties = dbReferenceElem .getChildren(UniprotStuff.DB_REFERENCE_PROPERTY_TAG_NAME); for (Element prop : properties) { if (prop.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE) .equals("entry name")) { pfamEntryNameSt = prop.getAttributeValue( UniprotStuff.DB_REFERENCE_VALUE_ATTRIBUTE); break; } } pfamProperties.put(PfamNode.ID_PROPERTY, pfamId); pfamProperties.put(PfamNode.NAME_PROPERTY, pfamEntryNameSt); pfamNodeId = inserter.createNode(pfamProperties); pfamIdIndex.add(pfamNodeId, MapUtil.map(PfamNode.PFAM_ID_INDEX, pfamId)); //flushing pfam id index pfamIdIndex.flush(); //---adding pfam node to node_type index---- nodeTypeIndex.add(pfamNodeId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, PfamNode.NODE_TYPE)); } inserter.createRelationship(currentProteinId, pfamNodeId, proteinPfamRel, null); } } //-------------------GO ----------------------------- else if (dbReferenceElem.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE) .toUpperCase().equals(UniprotStuff.GO_DB_REFERENCE_TYPE)) { if (uniprotDataXML.getGeneOntology()) { String goId = dbReferenceElem .getAttributeValue(UniprotStuff.DB_REFERENCE_ID_ATTRIBUTE); String evidenceSt = ""; List<Element> props = dbReferenceElem .getChildren(UniprotStuff.DB_REFERENCE_PROPERTY_TAG_NAME); for (Element element : props) { if (element.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE) .equals(UniprotStuff.EVIDENCE_TYPE_ATTRIBUTE)) { evidenceSt = element.getAttributeValue("value"); if (evidenceSt == null) { evidenceSt = ""; } break; } } long goTermNodeId = goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, goId) .getSingle(); proteinGoProperties.put(ProteinGoRel.EVIDENCE_PROPERTY, evidenceSt); inserter.createRelationship(currentProteinId, goTermNodeId, proteinGoRel, proteinGoProperties); } } } //--------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------- //--------------------------------organism----------------------------------------------- String scName, commName, synName; scName = ""; commName = ""; synName = ""; Element organismElem = entryXMLElem.asJDomElement() .getChild(UniprotStuff.ORGANISM_TAG_NAME); List<Element> organismNames = organismElem.getChildren(UniprotStuff.ORGANISM_NAME_TAG_NAME); for (Element element : organismNames) { String type = element.getAttributeValue(UniprotStuff.ORGANISM_NAME_TYPE_ATTRIBUTE); switch (type) { case UniprotStuff.ORGANISM_SCIENTIFIC_NAME_TYPE: scName = element.getText(); break; case UniprotStuff.ORGANISM_COMMON_NAME_TYPE: commName = element.getText(); break; case UniprotStuff.ORGANISM_SYNONYM_NAME_TYPE: synName = element.getText(); break; } } //long organismNodeId = indexService.getSingleNode(OrganismNode.ORGANISM_SCIENTIFIC_NAME_INDEX, scName); long organismNodeId = -1; IndexHits<Long> organismScientifiNameIndexHits = organismScientificNameIndex .get(OrganismNode.ORGANISM_SCIENTIFIC_NAME_INDEX, scName); if (organismScientifiNameIndexHits.hasNext()) { organismNodeId = organismScientifiNameIndexHits.getSingle(); } if (organismNodeId < 0) { organismProperties.put(OrganismNode.COMMON_NAME_PROPERTY, commName); organismProperties.put(OrganismNode.SCIENTIFIC_NAME_PROPERTY, scName); organismProperties.put(OrganismNode.SYNONYM_NAME_PROPERTY, synName); List<Element> organismDbRefElems = organismElem .getChildren(UniprotStuff.DB_REFERENCE_TAG_NAME); boolean ncbiIdFound = false; if (organismDbRefElems != null) { for (Element dbRefElem : organismDbRefElems) { String t = dbRefElem.getAttributeValue("type"); if (t.equals("NCBI Taxonomy")) { organismProperties.put(OrganismNode.NCBI_TAXONOMY_ID_PROPERTY, dbRefElem.getAttributeValue("id")); ncbiIdFound = true; break; } } } if (!ncbiIdFound) { organismProperties.put(OrganismNode.NCBI_TAXONOMY_ID_PROPERTY, ""); } organismNodeId = inserter.createNode(organismProperties); organismScientificNameIndex.add(organismNodeId, MapUtil.map(OrganismNode.ORGANISM_SCIENTIFIC_NAME_INDEX, scName)); organismNcbiTaxonomyIdIndex.add(organismNodeId, MapUtil.map(OrganismNode.NCBI_TAXONOMY_ID_PROPERTY, organismProperties.get(OrganismNode.NCBI_TAXONOMY_ID_PROPERTY))); //flushing organism scientifica name index organismScientificNameIndex.flush(); //---adding organism node to node_type index---- nodeTypeIndex.add(organismNodeId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, OrganismNode.NODE_TYPE)); Element lineage = entryXMLElem.asJDomElement().getChild("organism").getChild("lineage"); List<Element> taxons = lineage.getChildren("taxon"); Element firstTaxonElem = taxons.get(0); //long firstTaxonId = indexService.getSingleNode(TaxonNode.TAXON_NAME_INDEX, firstTaxonElem.getText()); long firstTaxonId = -1; IndexHits<Long> firstTaxonIndexHits = taxonNameIndex.get(TaxonNode.TAXON_NAME_INDEX, firstTaxonElem.getText()); if (firstTaxonIndexHits.hasNext()) { firstTaxonId = firstTaxonIndexHits.getSingle(); } if (firstTaxonId < 0) { String firstTaxonName = firstTaxonElem.getText(); taxonProperties.put(TaxonNode.NAME_PROPERTY, firstTaxonName); firstTaxonId = createTaxonNode(taxonProperties, inserter, taxonNameIndex, nodeTypeIndex); //flushing taxon name index-- taxonNameIndex.flush(); } long lastTaxonId = firstTaxonId; for (int i = 1; i < taxons.size(); i++) { String taxonName = taxons.get(i).getText(); long currentTaxonId = -1; IndexHits<Long> currentTaxonIndexHits = taxonNameIndex .get(TaxonNode.TAXON_NAME_INDEX, taxonName); if (currentTaxonIndexHits.hasNext()) { currentTaxonId = currentTaxonIndexHits.getSingle(); } if (currentTaxonId < 0) { taxonProperties.put(TaxonNode.NAME_PROPERTY, taxonName); currentTaxonId = createTaxonNode(taxonProperties, inserter, taxonNameIndex, nodeTypeIndex); //flushing taxon name index-- taxonNameIndex.flush(); inserter.createRelationship(lastTaxonId, currentTaxonId, taxonParentRel, null); } lastTaxonId = currentTaxonId; } inserter.createRelationship(lastTaxonId, organismNodeId, taxonParentRel, null); } //--------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------- inserter.createRelationship(currentProteinId, organismNodeId, proteinOrganismRel, null); proteinCounter++; if ((proteinCounter % limitForPrintingOut) == 0) { String countProteinsSt = proteinCounter + " proteins inserted!!"; logger.log(Level.INFO, countProteinsSt); } } } } catch (Exception e) { logger.log(Level.SEVERE, ("Exception retrieving protein " + currentAccessionId)); logger.log(Level.SEVERE, e.getMessage()); StackTraceElement[] trace = e.getStackTrace(); for (StackTraceElement stackTraceElement : trace) { logger.log(Level.SEVERE, stackTraceElement.toString()); } } finally { try { //------closing writers------- enzymeIdsNotFoundBuff.close(); // shutdown, makes sure all changes are written to disk indexProvider.shutdown(); inserter.shutdown(); // closing logger file handler fh.close(); //-----------------writing stats file--------------------- long elapsedTime = System.nanoTime() - initTime; long elapsedSeconds = Math.round((elapsedTime / 1000000000.0)); long hours = elapsedSeconds / 3600; long minutes = (elapsedSeconds % 3600) / 60; long seconds = (elapsedSeconds % 3600) % 60; statsBuff.write("Statistics for program ImportUniprot:\nInput file: " + inFile.getName() + "\nThere were " + proteinCounter + " proteins inserted.\n" + "The elapsed time was: " + hours + "h " + minutes + "m " + seconds + "s\n"); //---closing stats writer--- statsBuff.close(); } catch (IOException ex) { Logger.getLogger(ImportUniprot.class.getName()).log(Level.SEVERE, null, ex); } } } }