Example usage for org.jdom2 Element getAttributeValue

List of usage examples for org.jdom2 Element getAttributeValue

Introduction

In this page you can find the example usage for org.jdom2 Element getAttributeValue.

Prototype

public String getAttributeValue(final String attname) 

Source Link

Document

This returns the attribute value for the attribute with the given name and within no namespace, null if there is no such attribute, and the empty string if the attribute value is empty.

Usage

From source file:com.bio4j.neo4jdb.programs.ImportUniprot.java

License:Open Source License

public static void main(String[] args) {

    if (args.length != 4) {
        System.out.println("This program expects the following parameters: \n" + "1. Uniprot xml filename \n"
                + "2. Bio4j DB folder \n" + "3. batch inserter .properties file \n" + "4. Config XML file");
    } else {/*w  w w  .  j  a v a  2s.co  m*/

        long initTime = System.nanoTime();

        File inFile = new File(args[0]);
        File configFile = new File(args[3]);

        String currentAccessionId = "";

        BatchInserter inserter = null;
        BatchInserterIndexProvider indexProvider = null;

        BufferedWriter enzymeIdsNotFoundBuff = null;
        BufferedWriter statsBuff = null;

        int proteinCounter = 0;
        int limitForPrintingOut = 10000;

        try {

            // This block configures the logger with handler and formatter
            fh = new FileHandler("ImportUniprot" + args[0].split("\\.")[0] + ".log", false);

            SimpleFormatter formatter = new SimpleFormatter();
            fh.setFormatter(formatter);
            logger.addHandler(fh);
            logger.setLevel(Level.ALL);

            System.out.println("Reading conf file...");
            BufferedReader reader = new BufferedReader(new FileReader(configFile));
            String line;
            StringBuilder stBuilder = new StringBuilder();
            while ((line = reader.readLine()) != null) {
                stBuilder.append(line);
            }
            reader.close();

            UniprotDataXML uniprotDataXML = new UniprotDataXML(stBuilder.toString());

            //---creating writer for enzymes not found file-----
            enzymeIdsNotFoundBuff = new BufferedWriter(new FileWriter(new File("EnzymeIdsNotFound.log")));

            //---creating writer for stats file-----
            statsBuff = new BufferedWriter(new FileWriter(
                    new File("ImportUniprotStats_" + inFile.getName().split("\\.")[0] + ".txt")));

            // create the batch inserter
            inserter = BatchInserters.inserter(args[1], MapUtil.load(new File(args[2])));

            // create the batch index service
            indexProvider = new LuceneBatchInserterIndexProvider(inserter);

            //-----------------create batch indexes----------------------------------
            //----------------------------------------------------------------------
            BatchInserterIndex proteinAccessionIndex = indexProvider.nodeIndex(
                    ProteinNode.PROTEIN_ACCESSION_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex proteinFullNameFullTextIndex = indexProvider.nodeIndex(
                    ProteinNode.PROTEIN_FULL_NAME_FULL_TEXT_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, FULL_TEXT_ST));
            BatchInserterIndex proteinGeneNamesFullTextIndex = indexProvider.nodeIndex(
                    ProteinNode.PROTEIN_GENE_NAMES_FULL_TEXT_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, FULL_TEXT_ST));
            BatchInserterIndex proteinEnsemblPlantsIndex = indexProvider.nodeIndex(
                    ProteinNode.PROTEIN_ENSEMBL_PLANTS_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex datasetNameIndex = indexProvider.nodeIndex(DatasetNode.DATASET_NAME_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex keywordIdIndex = indexProvider.nodeIndex(KeywordNode.KEYWORD_ID_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex keywordNameIndex = indexProvider.nodeIndex(KeywordNode.KEYWORD_NAME_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex interproIdIndex = indexProvider.nodeIndex(InterproNode.INTERPRO_ID_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex pfamIdIndex = indexProvider.nodeIndex(PfamNode.PFAM_ID_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex goTermIdIndex = indexProvider.nodeIndex(GoTermNode.GO_TERM_ID_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex organismScientificNameIndex = indexProvider.nodeIndex(
                    OrganismNode.ORGANISM_SCIENTIFIC_NAME_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex organismNcbiTaxonomyIdIndex = indexProvider.nodeIndex(
                    OrganismNode.ORGANISM_NCBI_TAXONOMY_ID_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex taxonNameIndex = indexProvider.nodeIndex(TaxonNode.TAXON_NAME_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex genomeElementVersionIndex = indexProvider.nodeIndex(
                    GenomeElementNode.GENOME_ELEMENT_VERSION_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex reactomeTermIdIndex = indexProvider.nodeIndex(
                    ReactomeTermNode.REACTOME_TERM_ID_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex enzymeIdIndex = indexProvider.nodeIndex(EnzymeNode.ENZYME_ID_INDEX,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex nodeTypeIndex = indexProvider.nodeIndex(Bio4jManager.NODE_TYPE_INDEX_NAME,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            BatchInserterIndex mainNodesIndex = indexProvider.nodeIndex(Bio4jManager.MAIN_NODES_INDEX_NAME,
                    MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
            //----------------------------------------------------------------------
            //----------------------------------------------------------------------

            reader = new BufferedReader(new FileReader(inFile));
            StringBuilder entryStBuilder = new StringBuilder();

            //----------------------------------------------------------------------
            //------------------------looking up for main nodes---------------------
            alternativeProductInitiationId = mainNodesIndex
                    .get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.ALTERNATIVE_PRODUCT_INITIATION)
                    .getSingle();
            alternativeProductPromoterId = mainNodesIndex
                    .get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.ALTERNATIVE_PRODUCT_PROMOTER)
                    .getSingle();
            alternativeProductSplicingId = mainNodesIndex
                    .get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.ALTERNATIVE_PRODUCT_SPLICING)
                    .getSingle();
            alternativeProductRibosomalFrameshiftingId = mainNodesIndex.get(Bio4jManager.MAIN_NODES_INDEX_NAME,
                    Bio4jManager.ALTERNATIVE_PRODUCT_RIBOSOMAL_FRAMESHIFTING).getSingle();
            seqCautionErroneousInitiationId = mainNodesIndex
                    .get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.SEQUENCE_CAUTION_ERRONEOUS_INITIATION)
                    .getSingle();
            seqCautionErroneousTranslationId = mainNodesIndex.get(Bio4jManager.MAIN_NODES_INDEX_NAME,
                    Bio4jManager.SEQUENCE_CAUTION_ERRONEOUS_TRANSLATION).getSingle();
            seqCautionFrameshiftId = mainNodesIndex
                    .get(Bio4jManager.MAIN_NODES_INDEX_NAME, Bio4jManager.SEQUENCE_CAUTION_FRAMESHIFT)
                    .getSingle();
            seqCautionErroneousTerminationId = mainNodesIndex.get(Bio4jManager.MAIN_NODES_INDEX_NAME,
                    Bio4jManager.SEQUENCE_CAUTION_ERRONEOUS_TERMINATION).getSingle();
            seqCautionMiscellaneousDiscrepancyId = mainNodesIndex.get(Bio4jManager.MAIN_NODES_INDEX_NAME,
                    Bio4jManager.SEQUENCE_CAUTION_MISCELLANEOUS_DISCREPANCY).getSingle();
            seqCautionErroneousGeneModelPredictionId = mainNodesIndex.get(Bio4jManager.MAIN_NODES_INDEX_NAME,
                    Bio4jManager.SEQUENCE_CAUTION_ERRONEOUS_GENE_MODEL_PREDICTION).getSingle();
            //----------------------------------------------------------------------

            //----------------------------------------------------------------------------------
            //---------------------initializing node type properties----------------------------
            organismProperties.put(OrganismNode.NODE_TYPE_PROPERTY, OrganismNode.NODE_TYPE);
            proteinProperties.put(ProteinNode.NODE_TYPE_PROPERTY, ProteinNode.NODE_TYPE);
            keywordProperties.put(KeywordNode.NODE_TYPE_PROPERTY, KeywordNode.NODE_TYPE);
            subcellularLocationProperties.put(SubcellularLocationNode.NODE_TYPE_PROPERTY,
                    SubcellularLocationNode.NODE_TYPE);
            interproProperties.put(InterproNode.NODE_TYPE_PROPERTY, InterproNode.NODE_TYPE);
            pfamProperties.put(PfamNode.NODE_TYPE_PROPERTY, PfamNode.NODE_TYPE);
            taxonProperties.put(TaxonNode.NODE_TYPE_PROPERTY, TaxonNode.NODE_TYPE);
            datasetProperties.put(DatasetNode.NODE_TYPE_PROPERTY, DatasetNode.NODE_TYPE);
            personProperties.put(PersonNode.NODE_TYPE_PROPERTY, PersonNode.NODE_TYPE);
            consortiumProperties.put(ConsortiumNode.NODE_TYPE_PROPERTY, ConsortiumNode.NODE_TYPE);
            instituteProperties.put(InstituteNode.NODE_TYPE_PROPERTY, InstituteNode.NODE_TYPE);
            thesisProperties.put(ThesisNode.NODE_TYPE_PROPERTY, ThesisNode.NODE_TYPE);
            bookProperties.put(BookNode.NODE_TYPE_PROPERTY, BookNode.NODE_TYPE);
            patentProperties.put(PatentNode.NODE_TYPE_PROPERTY, PatentNode.NODE_TYPE);
            articleProperties.put(ArticleNode.NODE_TYPE_PROPERTY, ArticleNode.NODE_TYPE);
            submissionProperties.put(SubmissionNode.NODE_TYPE_PROPERTY, SubmissionNode.NODE_TYPE);
            onlineArticleProperties.put(OnlineArticleNode.NODE_TYPE_PROPERTY, OnlineArticleNode.NODE_TYPE);
            unpublishedObservationProperties.put(UnpublishedObservationNode.NODE_TYPE_PROPERTY,
                    UnpublishedObservationNode.NODE_TYPE);
            publisherProperties.put(PublisherNode.NODE_TYPE_PROPERTY, PublisherNode.NODE_TYPE);
            cityProperties.put(CityNode.NODE_TYPE_PROPERTY, CityNode.NODE_TYPE);
            journalProperties.put(JournalNode.NODE_TYPE_PROPERTY, JournalNode.NODE_TYPE);
            onlineJournalProperties.put(OnlineJournalNode.NODE_TYPE_PROPERTY, OnlineJournalNode.NODE_TYPE);
            countryProperties.put(CountryNode.NODE_TYPE_PROPERTY, CountryNode.NODE_TYPE);
            isoformProperties.put(IsoformNode.NODE_TYPE_PROPERTY, IsoformNode.NODE_TYPE);
            commentTypeProperties.put(CommentTypeNode.NODE_TYPE_PROPERTY, CommentTypeNode.NODE_TYPE);
            featureTypeProperties.put(FeatureTypeNode.NODE_TYPE_PROPERTY, FeatureTypeNode.NODE_TYPE);
            //-----------------------------------------------------------------------------------------
            //-----------------------------------------------------------------------------------------

            while ((line = reader.readLine()) != null) {
                if (line.trim().startsWith("<" + UniprotStuff.ENTRY_TAG_NAME)) {

                    while (!line.trim().startsWith("</" + UniprotStuff.ENTRY_TAG_NAME + ">")) {
                        entryStBuilder.append(line);
                        line = reader.readLine();
                    }
                    //linea final del organism
                    entryStBuilder.append(line);
                    //System.out.println("organismStBuilder.toString() = " + organismStBuilder.toString());
                    XMLElement entryXMLElem = new XMLElement(entryStBuilder.toString());
                    entryStBuilder.delete(0, entryStBuilder.length());

                    String modifiedDateSt = entryXMLElem.asJDomElement()
                            .getAttributeValue(UniprotStuff.ENTRY_MODIFIED_DATE_ATTRIBUTE);

                    String accessionSt = entryXMLElem.asJDomElement()
                            .getChildText(UniprotStuff.ENTRY_ACCESSION_TAG_NAME);
                    String nameSt = entryXMLElem.asJDomElement().getChildText(UniprotStuff.ENTRY_NAME_TAG_NAME);
                    String fullNameSt = getProteinFullName(
                            entryXMLElem.asJDomElement().getChild(UniprotStuff.PROTEIN_TAG_NAME));
                    String shortNameSt = getProteinShortName(
                            entryXMLElem.asJDomElement().getChild(UniprotStuff.PROTEIN_TAG_NAME));

                    if (shortNameSt == null) {
                        shortNameSt = "";
                    }
                    if (fullNameSt == null) {
                        fullNameSt = "";
                    }

                    currentAccessionId = accessionSt;

                    //-----------alternative accessions-------------
                    ArrayList<String> alternativeAccessions = new ArrayList<>();
                    List<Element> altAccessionsList = entryXMLElem.asJDomElement()
                            .getChildren(UniprotStuff.ENTRY_ACCESSION_TAG_NAME);
                    for (int i = 1; i < altAccessionsList.size(); i++) {
                        alternativeAccessions.add(altAccessionsList.get(i).getText());
                    }
                    proteinProperties.put(ProteinNode.ALTERNATIVE_ACCESSIONS_PROPERTY,
                            convertToStringArray(alternativeAccessions));

                    //-----db references-------------
                    String pirIdSt = "";
                    String keggIdSt = "";
                    String ensemblIdSt = "";
                    String uniGeneIdSt = "";
                    String arrayExpressIdSt = "";

                    List<Element> dbReferenceList = entryXMLElem.asJDomElement()
                            .getChildren(UniprotStuff.DB_REFERENCE_TAG_NAME);
                    ArrayList<String> emblCrossReferences = new ArrayList<>();
                    ArrayList<String> refseqReferences = new ArrayList<>();
                    ArrayList<String> enzymeDBReferences = new ArrayList<>();
                    ArrayList<String> ensemblPlantsReferences = new ArrayList<>();
                    HashMap<String, String> reactomeReferences = new HashMap<>();

                    for (Element dbReferenceElem : dbReferenceList) {
                        String refId = dbReferenceElem.getAttributeValue("id");
                        switch (dbReferenceElem.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE)) {
                        case "Ensembl":
                            ensemblIdSt = refId;
                            break;
                        case "PIR":
                            pirIdSt = refId;
                            break;
                        case "UniGene":
                            uniGeneIdSt = refId;
                            break;
                        case "KEGG":
                            keggIdSt = refId;
                            break;
                        case "EMBL":
                            emblCrossReferences.add(refId);
                            break;
                        case "EC":
                            enzymeDBReferences.add(refId);
                            break;
                        case "ArrayExpress":
                            arrayExpressIdSt = refId;
                            break;
                        case "RefSeq":
                            //refseqReferences.add(refId);
                            List<Element> children = dbReferenceElem.getChildren("property");
                            for (Element propertyElem : children) {
                                if (propertyElem.getAttributeValue("type").equals("nucleotide sequence ID")) {
                                    refseqReferences.add(propertyElem.getAttributeValue("value"));
                                }
                            }
                            break;
                        case "Reactome":
                            Element propertyElem = dbReferenceElem.getChild("property");
                            String pathwayName = "";
                            if (propertyElem.getAttributeValue("type").equals("pathway name")) {
                                pathwayName = propertyElem.getAttributeValue("value");
                            }
                            reactomeReferences.put(refId, pathwayName);
                            break;
                        case "EnsemblPlants":
                            ensemblPlantsReferences.add(refId);
                            break;
                        }

                    }

                    Element sequenceElem = entryXMLElem.asJDomElement()
                            .getChild(UniprotStuff.ENTRY_SEQUENCE_TAG_NAME);
                    String sequenceSt = sequenceElem.getText();
                    int seqLength = Integer
                            .parseInt(sequenceElem.getAttributeValue(UniprotStuff.SEQUENCE_LENGTH_ATTRIBUTE));
                    float seqMass = Float
                            .parseFloat(sequenceElem.getAttributeValue(UniprotStuff.SEQUENCE_MASS_ATTRIBUTE));

                    //System.out.println("lalala " + seqMass);
                    proteinProperties.put(ProteinNode.MODIFIED_DATE_PROPERTY, modifiedDateSt);
                    proteinProperties.put(ProteinNode.ACCESSION_PROPERTY, accessionSt);
                    proteinProperties.put(ProteinNode.NAME_PROPERTY, nameSt);
                    proteinProperties.put(ProteinNode.FULL_NAME_PROPERTY, fullNameSt);
                    proteinProperties.put(ProteinNode.SHORT_NAME_PROPERTY, shortNameSt);
                    proteinProperties.put(ProteinNode.SEQUENCE_PROPERTY, sequenceSt);
                    proteinProperties.put(ProteinNode.LENGTH_PROPERTY, seqLength);
                    proteinProperties.put(ProteinNode.MASS_PROPERTY, seqMass);
                    proteinProperties.put(ProteinNode.ARRAY_EXPRESS_ID_PROPERTY, arrayExpressIdSt);
                    proteinProperties.put(ProteinNode.PIR_ID_PROPERTY, pirIdSt);
                    proteinProperties.put(ProteinNode.KEGG_ID_PROPERTY, keggIdSt);
                    proteinProperties.put(ProteinNode.EMBL_REFERENCES_PROPERTY,
                            convertToStringArray(emblCrossReferences));
                    proteinProperties.put(ProteinNode.ENSEMBL_PLANTS_REFERENCES_PROPERTY,
                            convertToStringArray(ensemblPlantsReferences));
                    proteinProperties.put(ProteinNode.ENSEMBL_ID_PROPERTY, ensemblIdSt);
                    proteinProperties.put(ProteinNode.UNIGENE_ID_PROPERTY, uniGeneIdSt);

                    //---------------gene-names-------------------
                    Element geneElement = entryXMLElem.asJDomElement().getChild(UniprotStuff.GENE_TAG_NAME);
                    ArrayList<String> geneNames = new ArrayList<>();
                    if (geneElement != null) {
                        List<Element> genesList = geneElement.getChildren(UniprotStuff.GENE_NAME_TAG_NAME);
                        for (Element geneNameElem : genesList) {
                            geneNames.add(geneNameElem.getText());
                        }
                    }
                    proteinProperties.put(ProteinNode.GENE_NAMES_PROPERTY, convertToStringArray(geneNames));
                    //-----------------------------------------

                    long currentProteinId = inserter.createNode(proteinProperties);
                    proteinAccessionIndex.add(currentProteinId,
                            MapUtil.map(ProteinNode.PROTEIN_ACCESSION_INDEX, accessionSt));

                    //indexing protein by alternative accessions
                    for (String altAccessionSt : alternativeAccessions) {
                        proteinAccessionIndex.add(currentProteinId,
                                MapUtil.map(ProteinNode.PROTEIN_ACCESSION_INDEX, altAccessionSt));
                    }
                    //---flushing protein accession index----
                    proteinAccessionIndex.flush();

                    //---adding protein node to node_type index----
                    nodeTypeIndex.add(currentProteinId,
                            MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, ProteinNode.NODE_TYPE));

                    //indexing protein by full name
                    if (!fullNameSt.isEmpty()) {
                        proteinFullNameFullTextIndex.add(currentProteinId,
                                MapUtil.map(ProteinNode.PROTEIN_FULL_NAME_FULL_TEXT_INDEX, fullNameSt));

                        //System.out.println(fullNameSt.toUpperCase() + " , " + currentProteinId);
                    }

                    //indexing protein by gene names
                    String geneNamesStToBeIndexed = "";
                    for (String geneNameSt : geneNames) {
                        geneNamesStToBeIndexed += geneNameSt + " ";
                    }

                    proteinGeneNamesFullTextIndex.add(currentProteinId, MapUtil
                            .map(ProteinNode.PROTEIN_GENE_NAMES_FULL_TEXT_INDEX, geneNamesStToBeIndexed));

                    //indexing protein by Ensembl plants references
                    for (String ensemblPlantRef : ensemblPlantsReferences) {
                        proteinEnsemblPlantsIndex.add(currentProteinId,
                                MapUtil.map(ProteinNode.PROTEIN_ENSEMBL_PLANTS_INDEX, ensemblPlantRef));
                    }

                    //--------------refseq associations----------------
                    if (uniprotDataXML.getRefseq()) {
                        for (String refseqReferenceSt : refseqReferences) {
                            //System.out.println("refseqReferenceSt = " + refseqReferenceSt);
                            IndexHits<Long> hits = genomeElementVersionIndex
                                    .get(GenomeElementNode.GENOME_ELEMENT_VERSION_INDEX, refseqReferenceSt);
                            if (hits.hasNext()) {
                                inserter.createRelationship(currentProteinId, hits.getSingle(),
                                        proteinGenomeElementRel, null);
                            } else {
                                logger.log(Level.INFO, ("GenomeElem not found for: " + currentAccessionId
                                        + " , " + refseqReferenceSt));
                            }

                        }
                    }

                    //--------------reactome associations----------------
                    if (uniprotDataXML.getReactome()) {
                        for (String reactomeId : reactomeReferences.keySet()) {
                            long reactomeTermNodeId = -1;
                            IndexHits<Long> reactomeTermIdIndexHits = reactomeTermIdIndex
                                    .get(ReactomeTermNode.REACTOME_TERM_ID_INDEX, reactomeId);
                            if (reactomeTermIdIndexHits.hasNext()) {
                                reactomeTermNodeId = reactomeTermIdIndexHits.getSingle();
                            }
                            if (reactomeTermNodeId < 0) {
                                reactomeTermProperties.put(ReactomeTermNode.ID_PROPERTY, reactomeId);
                                reactomeTermProperties.put(ReactomeTermNode.PATHWAY_NAME_PROPERTY,
                                        reactomeReferences.get(reactomeId));
                                reactomeTermNodeId = inserter.createNode(reactomeTermProperties);
                                reactomeTermIdIndex.add(reactomeTermNodeId,
                                        MapUtil.map(ReactomeTermNode.REACTOME_TERM_ID_INDEX, reactomeId));
                                //----flushing reactome index---
                                reactomeTermIdIndex.flush();
                                //---adding reactome term node to node_type index----
                                nodeTypeIndex.add(reactomeTermNodeId, MapUtil
                                        .map(Bio4jManager.NODE_TYPE_INDEX_NAME, ReactomeTermNode.NODE_TYPE));
                            }
                            inserter.createRelationship(currentProteinId, reactomeTermNodeId,
                                    proteinReactomeRel, null);
                        }
                    }
                    //-------------------------------------------------------

                    //---------------enzyme db associations----------------------
                    if (uniprotDataXML.getEnzymeDb()) {
                        for (String enzymeDBRef : enzymeDBReferences) {
                            long enzymeNodeId;
                            IndexHits<Long> enzymeIdIndexHits = enzymeIdIndex.get(EnzymeNode.ENZYME_ID_INDEX,
                                    enzymeDBRef);
                            if (enzymeIdIndexHits.hasNext()) {
                                enzymeNodeId = enzymeIdIndexHits.next();
                                inserter.createRelationship(currentProteinId, enzymeNodeId,
                                        proteinEnzymaticActivityRel, null);
                            } else {
                                enzymeIdsNotFoundBuff.write(
                                        "Enzyme term: " + enzymeDBRef + " not found.\t" + currentAccessionId);
                            }
                        }
                    }
                    //------------------------------------------------------------

                    //-----comments import---
                    if (uniprotDataXML.getComments()) {
                        importProteinComments(entryXMLElem, inserter, indexProvider, currentProteinId,
                                sequenceSt, uniprotDataXML);
                    }

                    //-----features import----
                    if (uniprotDataXML.getFeatures()) {
                        importProteinFeatures(entryXMLElem, inserter, indexProvider, currentProteinId);
                    }

                    //--------------------------------datasets--------------------------------------------------
                    String proteinDataSetSt = entryXMLElem.asJDomElement()
                            .getAttributeValue(UniprotStuff.ENTRY_DATASET_ATTRIBUTE);
                    //long datasetId = indexService.getSingleNode(DatasetNode.DATASET_NAME_INDEX, proteinDataSetSt);
                    long datasetId = -1;
                    IndexHits<Long> datasetNameIndexHits = datasetNameIndex.get(DatasetNode.DATASET_NAME_INDEX,
                            proteinDataSetSt);
                    if (datasetNameIndexHits.hasNext()) {
                        datasetId = datasetNameIndexHits.getSingle();
                    }
                    if (datasetId < 0) {
                        datasetProperties.put(DatasetNode.NAME_PROPERTY, proteinDataSetSt);
                        datasetId = inserter.createNode(datasetProperties);
                        datasetNameIndex.add(datasetId,
                                MapUtil.map(DatasetNode.DATASET_NAME_INDEX, proteinDataSetSt));
                        //----flushing dataset name index---
                        datasetNameIndex.flush();
                        //---adding dataset node to node_type index----
                        nodeTypeIndex.add(datasetId,
                                MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, DatasetNode.NODE_TYPE));
                    }
                    inserter.createRelationship(currentProteinId, datasetId, proteinDatasetRel, null);
                    //---------------------------------------------------------------------------------------------

                    if (uniprotDataXML.getCitations()) {
                        importProteinCitations(entryXMLElem, inserter, indexProvider, currentProteinId,
                                uniprotDataXML);
                    }

                    //-------------------------------keywords------------------------------------------------------
                    if (uniprotDataXML.getKeywords()) {
                        List<Element> keywordsList = entryXMLElem.asJDomElement()
                                .getChildren(UniprotStuff.KEYWORD_TAG_NAME);
                        for (Element keywordElem : keywordsList) {
                            String keywordId = keywordElem.getAttributeValue(UniprotStuff.KEYWORD_ID_ATTRIBUTE);
                            String keywordName = keywordElem.getText();
                            long keywordNodeId = -1;
                            IndexHits<Long> keyworIdIndexHits = keywordIdIndex.get(KeywordNode.KEYWORD_ID_INDEX,
                                    keywordId);
                            if (keyworIdIndexHits.hasNext()) {
                                keywordNodeId = keyworIdIndexHits.getSingle();
                            }
                            if (keywordNodeId < 0) {

                                keywordProperties.put(KeywordNode.ID_PROPERTY, keywordId);
                                keywordProperties.put(KeywordNode.NAME_PROPERTY, keywordName);

                                keywordNodeId = inserter.createNode(keywordProperties);

                                keywordIdIndex.add(keywordNodeId,
                                        MapUtil.map(KeywordNode.KEYWORD_ID_INDEX, keywordId));
                                keywordNameIndex.add(keywordNodeId,
                                        MapUtil.map(KeywordNode.KEYWORD_NAME_INDEX, keywordName));

                                //---flushing keyword id index----
                                keywordIdIndex.flush();

                                //---adding keyword node to node_type index----
                                nodeTypeIndex.add(keywordNodeId,
                                        MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, KeywordNode.NODE_TYPE));
                            }
                            inserter.createRelationship(currentProteinId, keywordNodeId, proteinKeywordRel,
                                    null);
                        }
                    }
                    //---------------------------------------------------------------------------------------

                    for (Element dbReferenceElem : dbReferenceList) {

                        //-------------------------------INTERPRO------------------------------------------------------  
                        if (dbReferenceElem.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE)
                                .equals(UniprotStuff.INTERPRO_DB_REFERENCE_TYPE)) {

                            if (uniprotDataXML.getInterpro()) {
                                String interproId = dbReferenceElem
                                        .getAttributeValue(UniprotStuff.DB_REFERENCE_ID_ATTRIBUTE);
                                //long interproNodeId = indexService.getSingleNode(InterproNode.INTERPRO_ID_INDEX, interproId);
                                long interproNodeId = -1;
                                IndexHits<Long> interproIdIndexHits = interproIdIndex
                                        .get(InterproNode.INTERPRO_ID_INDEX, interproId);
                                if (interproIdIndexHits.hasNext()) {
                                    interproNodeId = interproIdIndexHits.getSingle();
                                }

                                if (interproNodeId < 0) {
                                    String interproEntryNameSt = "";
                                    List<Element> properties = dbReferenceElem
                                            .getChildren(UniprotStuff.DB_REFERENCE_PROPERTY_TAG_NAME);
                                    for (Element prop : properties) {
                                        if (prop.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE)
                                                .equals(UniprotStuff.INTERPRO_ENTRY_NAME)) {
                                            interproEntryNameSt = prop.getAttributeValue(
                                                    UniprotStuff.DB_REFERENCE_VALUE_ATTRIBUTE);
                                            break;
                                        }
                                    }

                                    interproProperties.put(InterproNode.ID_PROPERTY, interproId);
                                    interproProperties.put(InterproNode.NAME_PROPERTY, interproEntryNameSt);
                                    interproNodeId = inserter.createNode(interproProperties);

                                    interproIdIndex.add(interproNodeId,
                                            MapUtil.map(InterproNode.INTERPRO_ID_INDEX, interproId));
                                    //flushing interpro id index
                                    interproIdIndex.flush();

                                    //---adding interpro node to node_type index----
                                    nodeTypeIndex.add(interproNodeId, MapUtil
                                            .map(Bio4jManager.NODE_TYPE_INDEX_NAME, InterproNode.NODE_TYPE));
                                }

                                inserter.createRelationship(currentProteinId, interproNodeId,
                                        proteinInterproRel, null);
                            }

                        } //-------------------------------PFAM------------------------------------------------------  
                        else if (dbReferenceElem.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE)
                                .equals("Pfam")) {

                            if (uniprotDataXML.getPfam()) {
                                String pfamId = dbReferenceElem
                                        .getAttributeValue(UniprotStuff.DB_REFERENCE_ID_ATTRIBUTE);
                                long pfamNodeId = -1;
                                IndexHits<Long> pfamIdIndexHits = pfamIdIndex.get(PfamNode.PFAM_ID_INDEX,
                                        pfamId);
                                if (pfamIdIndexHits.hasNext()) {
                                    pfamNodeId = pfamIdIndexHits.getSingle();
                                }

                                if (pfamNodeId < 0) {
                                    String pfamEntryNameSt = "";
                                    List<Element> properties = dbReferenceElem
                                            .getChildren(UniprotStuff.DB_REFERENCE_PROPERTY_TAG_NAME);
                                    for (Element prop : properties) {
                                        if (prop.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE)
                                                .equals("entry name")) {
                                            pfamEntryNameSt = prop.getAttributeValue(
                                                    UniprotStuff.DB_REFERENCE_VALUE_ATTRIBUTE);
                                            break;
                                        }
                                    }

                                    pfamProperties.put(PfamNode.ID_PROPERTY, pfamId);
                                    pfamProperties.put(PfamNode.NAME_PROPERTY, pfamEntryNameSt);
                                    pfamNodeId = inserter.createNode(pfamProperties);

                                    pfamIdIndex.add(pfamNodeId, MapUtil.map(PfamNode.PFAM_ID_INDEX, pfamId));
                                    //flushing pfam id index
                                    pfamIdIndex.flush();

                                    //---adding pfam node to node_type index----
                                    nodeTypeIndex.add(pfamNodeId,
                                            MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, PfamNode.NODE_TYPE));
                                }

                                inserter.createRelationship(currentProteinId, pfamNodeId, proteinPfamRel, null);
                            }

                        } //-------------------GO -----------------------------
                        else if (dbReferenceElem.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE)
                                .toUpperCase().equals(UniprotStuff.GO_DB_REFERENCE_TYPE)) {

                            if (uniprotDataXML.getGeneOntology()) {
                                String goId = dbReferenceElem
                                        .getAttributeValue(UniprotStuff.DB_REFERENCE_ID_ATTRIBUTE);
                                String evidenceSt = "";
                                List<Element> props = dbReferenceElem
                                        .getChildren(UniprotStuff.DB_REFERENCE_PROPERTY_TAG_NAME);
                                for (Element element : props) {
                                    if (element.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE)
                                            .equals(UniprotStuff.EVIDENCE_TYPE_ATTRIBUTE)) {
                                        evidenceSt = element.getAttributeValue("value");
                                        if (evidenceSt == null) {
                                            evidenceSt = "";
                                        }
                                        break;
                                    }
                                }
                                long goTermNodeId = goTermIdIndex.get(GoTermNode.GO_TERM_ID_INDEX, goId)
                                        .getSingle();
                                proteinGoProperties.put(ProteinGoRel.EVIDENCE_PROPERTY, evidenceSt);
                                inserter.createRelationship(currentProteinId, goTermNodeId, proteinGoRel,
                                        proteinGoProperties);
                            }

                        }

                    }
                    //---------------------------------------------------------------------------------------

                    //---------------------------------------------------------------------------------------
                    //--------------------------------organism-----------------------------------------------

                    String scName, commName, synName;
                    scName = "";
                    commName = "";
                    synName = "";

                    Element organismElem = entryXMLElem.asJDomElement()
                            .getChild(UniprotStuff.ORGANISM_TAG_NAME);

                    List<Element> organismNames = organismElem.getChildren(UniprotStuff.ORGANISM_NAME_TAG_NAME);
                    for (Element element : organismNames) {
                        String type = element.getAttributeValue(UniprotStuff.ORGANISM_NAME_TYPE_ATTRIBUTE);
                        switch (type) {
                        case UniprotStuff.ORGANISM_SCIENTIFIC_NAME_TYPE:
                            scName = element.getText();
                            break;
                        case UniprotStuff.ORGANISM_COMMON_NAME_TYPE:
                            commName = element.getText();
                            break;
                        case UniprotStuff.ORGANISM_SYNONYM_NAME_TYPE:
                            synName = element.getText();
                            break;
                        }
                    }

                    //long organismNodeId = indexService.getSingleNode(OrganismNode.ORGANISM_SCIENTIFIC_NAME_INDEX, scName);
                    long organismNodeId = -1;
                    IndexHits<Long> organismScientifiNameIndexHits = organismScientificNameIndex
                            .get(OrganismNode.ORGANISM_SCIENTIFIC_NAME_INDEX, scName);
                    if (organismScientifiNameIndexHits.hasNext()) {
                        organismNodeId = organismScientifiNameIndexHits.getSingle();
                    }
                    if (organismNodeId < 0) {

                        organismProperties.put(OrganismNode.COMMON_NAME_PROPERTY, commName);
                        organismProperties.put(OrganismNode.SCIENTIFIC_NAME_PROPERTY, scName);
                        organismProperties.put(OrganismNode.SYNONYM_NAME_PROPERTY, synName);

                        List<Element> organismDbRefElems = organismElem
                                .getChildren(UniprotStuff.DB_REFERENCE_TAG_NAME);
                        boolean ncbiIdFound = false;
                        if (organismDbRefElems != null) {
                            for (Element dbRefElem : organismDbRefElems) {
                                String t = dbRefElem.getAttributeValue("type");
                                if (t.equals("NCBI Taxonomy")) {
                                    organismProperties.put(OrganismNode.NCBI_TAXONOMY_ID_PROPERTY,
                                            dbRefElem.getAttributeValue("id"));
                                    ncbiIdFound = true;
                                    break;
                                }
                            }
                        }
                        if (!ncbiIdFound) {
                            organismProperties.put(OrganismNode.NCBI_TAXONOMY_ID_PROPERTY, "");
                        }
                        organismNodeId = inserter.createNode(organismProperties);

                        organismScientificNameIndex.add(organismNodeId,
                                MapUtil.map(OrganismNode.ORGANISM_SCIENTIFIC_NAME_INDEX, scName));
                        organismNcbiTaxonomyIdIndex.add(organismNodeId,
                                MapUtil.map(OrganismNode.NCBI_TAXONOMY_ID_PROPERTY,
                                        organismProperties.get(OrganismNode.NCBI_TAXONOMY_ID_PROPERTY)));

                        //flushing organism scientifica name index
                        organismScientificNameIndex.flush();

                        //---adding organism node to node_type index----
                        nodeTypeIndex.add(organismNodeId,
                                MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, OrganismNode.NODE_TYPE));

                        Element lineage = entryXMLElem.asJDomElement().getChild("organism").getChild("lineage");
                        List<Element> taxons = lineage.getChildren("taxon");

                        Element firstTaxonElem = taxons.get(0);

                        //long firstTaxonId = indexService.getSingleNode(TaxonNode.TAXON_NAME_INDEX, firstTaxonElem.getText());
                        long firstTaxonId = -1;
                        IndexHits<Long> firstTaxonIndexHits = taxonNameIndex.get(TaxonNode.TAXON_NAME_INDEX,
                                firstTaxonElem.getText());
                        if (firstTaxonIndexHits.hasNext()) {
                            firstTaxonId = firstTaxonIndexHits.getSingle();
                        }

                        if (firstTaxonId < 0) {

                            String firstTaxonName = firstTaxonElem.getText();
                            taxonProperties.put(TaxonNode.NAME_PROPERTY, firstTaxonName);
                            firstTaxonId = createTaxonNode(taxonProperties, inserter, taxonNameIndex,
                                    nodeTypeIndex);
                            //flushing taxon name index--
                            taxonNameIndex.flush();

                        }

                        long lastTaxonId = firstTaxonId;
                        for (int i = 1; i < taxons.size(); i++) {
                            String taxonName = taxons.get(i).getText();
                            long currentTaxonId = -1;
                            IndexHits<Long> currentTaxonIndexHits = taxonNameIndex
                                    .get(TaxonNode.TAXON_NAME_INDEX, taxonName);
                            if (currentTaxonIndexHits.hasNext()) {
                                currentTaxonId = currentTaxonIndexHits.getSingle();
                            }
                            if (currentTaxonId < 0) {

                                taxonProperties.put(TaxonNode.NAME_PROPERTY, taxonName);
                                currentTaxonId = createTaxonNode(taxonProperties, inserter, taxonNameIndex,
                                        nodeTypeIndex);
                                //flushing taxon name index--
                                taxonNameIndex.flush();
                                inserter.createRelationship(lastTaxonId, currentTaxonId, taxonParentRel, null);

                            }
                            lastTaxonId = currentTaxonId;
                        }

                        inserter.createRelationship(lastTaxonId, organismNodeId, taxonParentRel, null);

                    }

                    //---------------------------------------------------------------------------------------
                    //---------------------------------------------------------------------------------------

                    inserter.createRelationship(currentProteinId, organismNodeId, proteinOrganismRel, null);

                    proteinCounter++;
                    if ((proteinCounter % limitForPrintingOut) == 0) {
                        String countProteinsSt = proteinCounter + " proteins inserted!!";
                        logger.log(Level.INFO, countProteinsSt);
                    }

                }
            }

        } catch (Exception e) {
            logger.log(Level.SEVERE, ("Exception retrieving protein " + currentAccessionId));
            logger.log(Level.SEVERE, e.getMessage());
            StackTraceElement[] trace = e.getStackTrace();
            for (StackTraceElement stackTraceElement : trace) {
                logger.log(Level.SEVERE, stackTraceElement.toString());
            }
        } finally {

            try {
                //------closing writers-------
                enzymeIdsNotFoundBuff.close();

                // shutdown, makes sure all changes are written to disk
                indexProvider.shutdown();
                inserter.shutdown();

                // closing logger file handler
                fh.close();

                //-----------------writing stats file---------------------
                long elapsedTime = System.nanoTime() - initTime;
                long elapsedSeconds = Math.round((elapsedTime / 1000000000.0));
                long hours = elapsedSeconds / 3600;
                long minutes = (elapsedSeconds % 3600) / 60;
                long seconds = (elapsedSeconds % 3600) % 60;

                statsBuff.write("Statistics for program ImportUniprot:\nInput file: " + inFile.getName()
                        + "\nThere were " + proteinCounter + " proteins inserted.\n" + "The elapsed time was: "
                        + hours + "h " + minutes + "m " + seconds + "s\n");

                //---closing stats writer---
                statsBuff.close();

            } catch (IOException ex) {
                Logger.getLogger(ImportUniprot.class.getName()).log(Level.SEVERE, null, ex);
            }

        }
    }

}

From source file:com.bio4j.neo4jdb.programs.ImportUniprot.java

License:Open Source License

private static void importProteinFeatures(XMLElement entryXMLElem, BatchInserter inserter,
        BatchInserterIndexProvider indexProvider, long currentProteinId) {

    //-----------------create batch indexes----------------------------------
    //----------------------------------------------------------------------
    BatchInserterIndex featureTypeNameIndex = indexProvider.nodeIndex(FeatureTypeNode.FEATURE_TYPE_NAME_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex nodeTypeIndex = indexProvider.nodeIndex(Bio4jManager.NODE_TYPE_INDEX_NAME,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    //------------------------------------------------------------------------

    //--------------------------------features----------------------------------------------------
    List<Element> featuresList = entryXMLElem.asJDomElement().getChildren(UniprotStuff.FEATURE_TAG_NAME);

    for (Element featureElem : featuresList) {

        String featureTypeSt = featureElem.getAttributeValue(UniprotStuff.FEATURE_TYPE_ATTRIBUTE);
        //long featureTypeNodeId = indexService.getSingleNode(FeatureTypeNode.FEATURE_TYPE_NAME_INDEX, featureTypeSt);
        long featureTypeNodeId = -1;
        IndexHits<Long> featureTypeNameIndexHits = featureTypeNameIndex
                .get(FeatureTypeNode.FEATURE_TYPE_NAME_INDEX, featureTypeSt);
        if (featureTypeNameIndexHits.hasNext()) {
            featureTypeNodeId = featureTypeNameIndexHits.getSingle();
        }/* w w  w.j  av a 2s  . c  o m*/
        featureTypeNameIndexHits.close();

        if (featureTypeNodeId < 0) {

            featureTypeProperties.put(FeatureTypeNode.NAME_PROPERTY, featureTypeSt);
            featureTypeNodeId = inserter.createNode(featureTypeProperties);
            //indexService.index(featureTypeNodeId, FeatureTypeNode.FEATURE_TYPE_NAME_INDEX, featureTypeSt);
            featureTypeNameIndex.add(featureTypeNodeId,
                    MapUtil.map(FeatureTypeNode.FEATURE_TYPE_NAME_INDEX, featureTypeSt));
            //---flushing feature type name index----
            featureTypeNameIndex.flush();

            //---adding feature type node to node_type index----
            nodeTypeIndex.add(featureTypeNodeId,
                    MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, FeatureTypeNode.NODE_TYPE));

        }

        String featureDescSt = featureElem.getAttributeValue(UniprotStuff.FEATURE_DESCRIPTION_ATTRIBUTE);
        if (featureDescSt == null) {
            featureDescSt = "";
        }
        String featureIdSt = featureElem.getAttributeValue(UniprotStuff.FEATURE_ID_ATTRIBUTE);
        if (featureIdSt == null) {
            featureIdSt = "";
        }
        String featureStatusSt = featureElem.getAttributeValue(UniprotStuff.STATUS_ATTRIBUTE);
        if (featureStatusSt == null) {
            featureStatusSt = "";
        }
        String featureEvidenceSt = featureElem.getAttributeValue(UniprotStuff.EVIDENCE_ATTRIBUTE);
        if (featureEvidenceSt == null) {
            featureEvidenceSt = "";
        }

        Element locationElem = featureElem.getChild(UniprotStuff.FEATURE_LOCATION_TAG_NAME);
        Element positionElem = locationElem.getChild(UniprotStuff.FEATURE_POSITION_TAG_NAME);
        String beginFeatureSt;
        String endFeatureSt;
        if (positionElem != null) {
            beginFeatureSt = positionElem.getAttributeValue(UniprotStuff.FEATURE_POSITION_POSITION_ATTRIBUTE);
            endFeatureSt = beginFeatureSt;
        } else {
            beginFeatureSt = locationElem.getChild(UniprotStuff.FEATURE_LOCATION_BEGIN_TAG_NAME)
                    .getAttributeValue(UniprotStuff.FEATURE_LOCATION_POSITION_ATTRIBUTE);
            endFeatureSt = locationElem.getChild(UniprotStuff.FEATURE_LOCATION_END_TAG_NAME)
                    .getAttributeValue(UniprotStuff.FEATURE_LOCATION_POSITION_ATTRIBUTE);
        }

        if (beginFeatureSt == null) {
            beginFeatureSt = "";
        }
        if (endFeatureSt == null) {
            endFeatureSt = "";
        }

        String originalSt = featureElem.getChildText(UniprotStuff.FEATURE_ORIGINAL_TAG_NAME);
        String variationSt = featureElem.getChildText(UniprotStuff.FEATURE_VARIATION_TAG_NAME);
        if (originalSt == null) {
            originalSt = "";
        }
        if (variationSt == null) {
            variationSt = "";
        }
        String featureRefSt = featureElem.getAttributeValue(UniprotStuff.FEATURE_REF_ATTRIBUTE);
        if (featureRefSt == null) {
            featureRefSt = "";
        }

        featureProperties.put(BasicFeatureRel.DESCRIPTION_PROPERTY, featureDescSt);
        featureProperties.put(BasicFeatureRel.ID_PROPERTY, featureIdSt);
        featureProperties.put(BasicFeatureRel.EVIDENCE_PROPERTY, featureEvidenceSt);
        featureProperties.put(BasicFeatureRel.STATUS_PROPERTY, featureStatusSt);
        featureProperties.put(BasicFeatureRel.BEGIN_PROPERTY, beginFeatureSt);
        featureProperties.put(BasicFeatureRel.END_PROPERTY, endFeatureSt);
        featureProperties.put(BasicFeatureRel.ORIGINAL_PROPERTY, originalSt);
        featureProperties.put(BasicFeatureRel.VARIATION_PROPERTY, variationSt);
        featureProperties.put(BasicFeatureRel.REF_PROPERTY, featureRefSt);
        switch (featureTypeSt) {
        case ActiveSiteFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, activeSiteFeatureRel,
                    featureProperties);
            break;
        case BindingSiteFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, bindingSiteFeatureRel,
                    featureProperties);
            break;
        case CrossLinkFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, crossLinkFeatureRel,
                    featureProperties);
            break;
        case GlycosylationSiteFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, glycosylationSiteFeatureRel,
                    featureProperties);
            break;
        case InitiatorMethionineFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, initiatorMethionineFeatureRel,
                    featureProperties);
            break;
        case LipidMoietyBindingRegionFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, lipidMoietyBindingRegionFeatureRel,
                    featureProperties);
            break;
        case MetalIonBindingSiteFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, metalIonBindingSiteFeatureRel,
                    featureProperties);
            break;
        case ModifiedResidueFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, modifiedResidueFeatureRel,
                    featureProperties);
            break;
        case NonStandardAminoAcidFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, nonStandardAminoAcidFeatureRel,
                    featureProperties);
            break;
        case NonTerminalResidueFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, nonTerminalResidueFeatureRel,
                    featureProperties);
            break;
        case PeptideFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, peptideFeatureRel,
                    featureProperties);
            break;
        case UnsureResidueFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, unsureResidueFeatureRel,
                    featureProperties);
            break;
        case MutagenesisSiteFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, mutagenesisSiteFeatureRel,
                    featureProperties);
            break;
        case SequenceVariantFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, sequenceVariantFeatureRel,
                    featureProperties);
            break;
        case CalciumBindingRegionFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, calciumBindingRegionFeatureRel,
                    featureProperties);
            break;
        case ChainFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, chainFeatureRel,
                    featureProperties);
            break;
        case CoiledCoilRegionFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, coiledCoilRegionFeatureRel,
                    featureProperties);
            break;
        case CompositionallyBiasedRegionFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId,
                    compositionallyBiasedRegionFeatureRel, featureProperties);
            break;
        case DisulfideBondFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, disulfideBondFeatureRel,
                    featureProperties);
            break;
        case DnaBindingRegionFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, dnaBindingRegionFeatureRel,
                    featureProperties);
            break;
        case DomainFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, domainFeatureRel,
                    featureProperties);
            break;
        case HelixFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, helixFeatureRel,
                    featureProperties);
            break;
        case IntramembraneRegionFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, intramembraneRegionFeatureRel,
                    featureProperties);
            break;
        case NonConsecutiveResiduesFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, nonConsecutiveResiduesFeatureRel,
                    featureProperties);
            break;
        case NucleotidePhosphateBindingRegionFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId,
                    nucleotidePhosphateBindingRegionFeatureRel, featureProperties);
            break;
        case PropeptideFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, propeptideFeatureRel,
                    featureProperties);
            break;
        case RegionOfInterestFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, regionOfInterestFeatureRel,
                    featureProperties);
            break;
        case RepeatFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, repeatFeatureRel,
                    featureProperties);
            break;
        case ShortSequenceMotifFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, shortSequenceMotifFeatureRel,
                    featureProperties);
            break;
        case SignalPeptideFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, signalPeptideFeatureRel,
                    featureProperties);
            break;
        case SpliceVariantFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, spliceVariantFeatureRel,
                    featureProperties);
            break;
        case StrandFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, strandFeatureRel,
                    featureProperties);
            break;
        case TopologicalDomainFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, topologicalDomainFeatureRel,
                    featureProperties);
            break;
        case TransitPeptideFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, transitPeptideFeatureRel,
                    featureProperties);
            break;
        case TransmembraneRegionFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, transmembraneRegionFeatureRel,
                    featureProperties);
            break;
        case ZincFingerRegionFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, zincFingerRegionFeatureRel,
                    featureProperties);
            break;
        case SiteFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, siteFeatureRel, featureProperties);
            break;
        case TurnFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, turnFeatureRel, featureProperties);
            break;
        case SequenceConflictFeatureRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, featureTypeNodeId, sequenceConflictFeatureRel,
                    featureProperties);
            break;
        }

    }

}

From source file:com.bio4j.neo4jdb.programs.ImportUniprot.java

License:Open Source License

private static void importProteinComments(XMLElement entryXMLElem, BatchInserter inserter,
        BatchInserterIndexProvider indexProvider, long currentProteinId, String proteinSequence,
        UniprotDataXML uniprotDataXML) {

    //---------------indexes declaration---------------------------
    BatchInserterIndex commentTypeNameIndex = indexProvider.nodeIndex(CommentTypeNode.COMMENT_TYPE_NAME_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex subcellularLocationNameIndex = indexProvider.nodeIndex(
            SubcellularLocationNode.SUBCELLULAR_LOCATION_NAME_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex isoformIdIndex = indexProvider.nodeIndex(IsoformNode.ISOFORM_ID_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex nodeTypeIndex = indexProvider.nodeIndex(Bio4jManager.NODE_TYPE_INDEX_NAME,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    //-----------------------------------------------------------

    List<Element> comments = entryXMLElem.asJDomElement().getChildren(UniprotStuff.COMMENT_TAG_NAME);

    for (Element commentElem : comments) {

        String commentTypeSt = commentElem.getAttributeValue(UniprotStuff.COMMENT_TYPE_ATTRIBUTE);

        Element textElem = commentElem.getChild("text");
        String commentTextSt = "";
        String commentStatusSt = "";
        String commentEvidenceSt = "";
        if (textElem != null) {
            commentTextSt = textElem.getText();
            commentStatusSt = textElem.getAttributeValue("status");
            if (commentStatusSt == null) {
                commentStatusSt = "";
            }/*from   ww w  .j  a va 2s  .c o m*/
            commentEvidenceSt = textElem.getAttributeValue("evidence");
            if (commentEvidenceSt == null) {
                commentEvidenceSt = "";
            }
        }

        commentProperties.put(BasicCommentRel.TEXT_PROPERTY, commentTextSt);
        commentProperties.put(BasicCommentRel.STATUS_PROPERTY, commentStatusSt);
        commentProperties.put(BasicCommentRel.EVIDENCE_PROPERTY, commentEvidenceSt);

        //-----------------COMMENT TYPE NODE RETRIEVING/CREATION---------------------- 
        //long commentTypeId = indexService.getSingleNode(CommentTypeNode.COMMENT_TYPE_NAME_INDEX, commentTypeSt);
        IndexHits<Long> commentTypeNameIndexHits = commentTypeNameIndex
                .get(CommentTypeNode.COMMENT_TYPE_NAME_INDEX, commentTypeSt);
        long commentTypeId = -1;
        if (commentTypeNameIndexHits.hasNext()) {
            commentTypeId = commentTypeNameIndexHits.getSingle();
        }
        commentTypeNameIndexHits.close();
        if (commentTypeId < 0) {
            commentTypeProperties.put(CommentTypeNode.NAME_PROPERTY, commentTypeSt);
            commentTypeId = inserter.createNode(commentTypeProperties);
            commentTypeNameIndex.add(commentTypeId,
                    MapUtil.map(CommentTypeNode.COMMENT_TYPE_NAME_INDEX, commentTypeSt));

            //----flushing the indexation----
            commentTypeNameIndex.flush();

            //---adding comment type node to node_type index----
            nodeTypeIndex.add(commentTypeId,
                    MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, CommentTypeNode.NODE_TYPE));
        }
        //-----toxic dose----------------
        switch (commentTypeSt) {
        case ToxicDoseCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, toxicDoseCommentRel,
                    commentProperties);
            break;
        case CautionCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, cautionCommentRel, commentProperties);
            break;
        case CofactorCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, cofactorCommentRel, commentProperties);
            break;
        case DiseaseCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, diseaseCommentRel, commentProperties);
            break;
        case OnlineInformationCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            onlineInformationCommentProperties.put(OnlineInformationCommentRel.STATUS_PROPERTY,
                    commentStatusSt);
            onlineInformationCommentProperties.put(OnlineInformationCommentRel.EVIDENCE_PROPERTY,
                    commentEvidenceSt);
            onlineInformationCommentProperties.put(OnlineInformationCommentRel.TEXT_PROPERTY, commentTextSt);
            String nameSt = commentElem.getAttributeValue("name");
            if (nameSt == null) {
                nameSt = "";
            }
            String linkSt = "";
            Element linkElem = commentElem.getChild("link");
            if (linkElem != null) {
                String uriSt = linkElem.getAttributeValue("uri");
                if (uriSt != null) {
                    linkSt = uriSt;
                }
            }
            onlineInformationCommentProperties.put(OnlineInformationCommentRel.NAME_PROPERTY, nameSt);
            onlineInformationCommentProperties.put(OnlineInformationCommentRel.LINK_PROPERTY, linkSt);
            inserter.createRelationship(currentProteinId, commentTypeId, onlineInformationCommentRel,
                    onlineInformationCommentProperties);
            break;
        case TissueSpecificityCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, tissueSpecificityCommentRel,
                    commentProperties);
            break;
        case FunctionCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, functionCommentRel, commentProperties);
            break;
        case BiotechnologyCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, biotechnologyCommentRel,
                    commentProperties);
            break;
        case SubunitCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, subunitCommentRel, commentProperties);
            break;
        case PolymorphismCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, polymorphismCommentRel,
                    commentProperties);
            break;
        case DomainCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, domainCommentRel, commentProperties);
            break;
        case PostTranslationalModificationCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId,
                    postTranslationalModificationCommentRel, commentProperties);
            break;
        case CatalyticActivityCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, catalyticActivityCommentRel,
                    commentProperties);
            break;
        case DisruptionPhenotypeCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, disruptionPhenotypeCommentRel,
                    commentProperties);
            break;
        case BioPhysicoChemicalPropertiesCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            biophysicochemicalCommentProperties.put(BioPhysicoChemicalPropertiesCommentRel.STATUS_PROPERTY,
                    commentStatusSt);
            biophysicochemicalCommentProperties.put(BioPhysicoChemicalPropertiesCommentRel.EVIDENCE_PROPERTY,
                    commentEvidenceSt);
            biophysicochemicalCommentProperties.put(BioPhysicoChemicalPropertiesCommentRel.TEXT_PROPERTY,
                    commentTextSt);
            String phDependenceSt = commentElem.getChildText("phDependence");
            String temperatureDependenceSt = commentElem.getChildText("temperatureDependence");
            if (phDependenceSt == null) {
                phDependenceSt = "";
            }
            if (temperatureDependenceSt == null) {
                temperatureDependenceSt = "";
            }
            String absorptionMaxSt = "";
            String absorptionTextSt = "";
            Element absorptionElem = commentElem.getChild("absorption");
            if (absorptionElem != null) {
                absorptionMaxSt = absorptionElem.getChildText("max");
                absorptionTextSt = absorptionElem.getChildText("text");
                if (absorptionMaxSt == null) {
                    absorptionMaxSt = "";
                }
                if (absorptionTextSt == null) {
                    absorptionTextSt = "";
                }
            }
            String kineticsSt = "";
            Element kineticsElem = commentElem.getChild("kinetics");
            if (kineticsElem != null) {
                kineticsSt = new XMLElement(kineticsElem).toString();
            }
            String redoxPotentialSt = "";
            String redoxPotentialEvidenceSt = "";
            Element redoxPotentialElem = commentElem.getChild("redoxPotential");
            if (redoxPotentialElem != null) {
                redoxPotentialSt = redoxPotentialElem.getText();
                redoxPotentialEvidenceSt = redoxPotentialElem.getAttributeValue("evidence");
                if (redoxPotentialSt == null) {
                    redoxPotentialSt = "";
                }
                if (redoxPotentialEvidenceSt == null) {
                    redoxPotentialEvidenceSt = "";
                }
            }
            biophysicochemicalCommentProperties.put(
                    BioPhysicoChemicalPropertiesCommentRel.TEMPERATURE_DEPENDENCE_PROPERTY,
                    temperatureDependenceSt);
            biophysicochemicalCommentProperties
                    .put(BioPhysicoChemicalPropertiesCommentRel.PH_DEPENDENCE_PROPERTY, phDependenceSt);
            biophysicochemicalCommentProperties
                    .put(BioPhysicoChemicalPropertiesCommentRel.KINETICS_XML_PROPERTY, kineticsSt);
            biophysicochemicalCommentProperties
                    .put(BioPhysicoChemicalPropertiesCommentRel.ABSORPTION_MAX_PROPERTY, absorptionMaxSt);
            biophysicochemicalCommentProperties
                    .put(BioPhysicoChemicalPropertiesCommentRel.ABSORPTION_TEXT_PROPERTY, absorptionTextSt);
            biophysicochemicalCommentProperties.put(
                    BioPhysicoChemicalPropertiesCommentRel.REDOX_POTENTIAL_EVIDENCE_PROPERTY,
                    redoxPotentialEvidenceSt);
            biophysicochemicalCommentProperties
                    .put(BioPhysicoChemicalPropertiesCommentRel.REDOX_POTENTIAL_PROPERTY, redoxPotentialSt);
            inserter.createRelationship(currentProteinId, commentTypeId, bioPhysicoChemicalPropertiesCommentRel,
                    biophysicochemicalCommentProperties);
            break;
        case AllergenCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, allergenCommentRel, commentProperties);
            break;
        case PathwayCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, pathwayCommentRel, commentProperties);
            break;
        case InductionCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, inductionCommentRel,
                    commentProperties);
            break;
        case ProteinSubcellularLocationRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            if (uniprotDataXML.getSubcellularLocations()) {
                List<Element> subcLocations = commentElem
                        .getChildren(UniprotStuff.SUBCELLULAR_LOCATION_TAG_NAME);

                for (Element subcLocation : subcLocations) {

                    List<Element> locations = subcLocation.getChildren(UniprotStuff.LOCATION_TAG_NAME);
                    Element firstLocation = locations.get(0);
                    //long firstLocationId = indexService.getSingleNode(SubcellularLocationNode.SUBCELLULAR_LOCATION_NAME_INDEX, firstLocation.getTextTrim());
                    long firstLocationId = -1;
                    IndexHits<Long> firstLocationIndexHits = subcellularLocationNameIndex.get(
                            SubcellularLocationNode.SUBCELLULAR_LOCATION_NAME_INDEX,
                            firstLocation.getTextTrim());
                    if (firstLocationIndexHits.hasNext()) {
                        firstLocationId = firstLocationIndexHits.getSingle();
                    }
                    firstLocationIndexHits.close();
                    long lastLocationId = firstLocationId;

                    if (firstLocationId < 0) {
                        subcellularLocationProperties.put(SubcellularLocationNode.NAME_PROPERTY,
                                firstLocation.getTextTrim());
                        lastLocationId = createSubcellularLocationNode(subcellularLocationProperties, inserter,
                                subcellularLocationNameIndex, nodeTypeIndex);
                        //---flushing subcellular location name index---
                        subcellularLocationNameIndex.flush();
                    }

                    for (int i = 1; i < locations.size(); i++) {

                        long tempLocationId;
                        IndexHits<Long> tempLocationIndexHits = subcellularLocationNameIndex.get(
                                SubcellularLocationNode.SUBCELLULAR_LOCATION_NAME_INDEX,
                                locations.get(i).getTextTrim());
                        if (tempLocationIndexHits.hasNext()) {
                            tempLocationId = tempLocationIndexHits.getSingle();
                            tempLocationIndexHits.close();
                        } else {
                            subcellularLocationProperties.put(SubcellularLocationNode.NAME_PROPERTY,
                                    locations.get(i).getTextTrim());
                            tempLocationId = createSubcellularLocationNode(subcellularLocationProperties,
                                    inserter, subcellularLocationNameIndex, nodeTypeIndex);
                            subcellularLocationNameIndex.flush();
                        }

                        inserter.createRelationship(tempLocationId, lastLocationId,
                                subcellularLocationParentRel, null);
                        lastLocationId = tempLocationId;
                    }
                    Element lastLocation = locations.get(locations.size() - 1);
                    String evidenceSt = lastLocation.getAttributeValue(UniprotStuff.EVIDENCE_ATTRIBUTE);
                    String statusSt = lastLocation.getAttributeValue(UniprotStuff.STATUS_ATTRIBUTE);
                    String topologyStatusSt = "";
                    String topologySt = "";
                    Element topologyElem = subcLocation.getChild("topology");
                    if (topologyElem != null) {
                        topologySt = topologyElem.getText();
                        topologyStatusSt = topologyElem.getAttributeValue("status");
                    }
                    if (topologyStatusSt == null) {
                        topologyStatusSt = "";
                    }
                    if (topologySt == null) {
                        topologySt = "";
                    }
                    if (evidenceSt == null) {
                        evidenceSt = "";
                    }
                    if (statusSt == null) {
                        statusSt = "";
                    }
                    proteinSubcellularLocationProperties.put(ProteinSubcellularLocationRel.EVIDENCE_PROPERTY,
                            evidenceSt);
                    proteinSubcellularLocationProperties.put(ProteinSubcellularLocationRel.STATUS_PROPERTY,
                            statusSt);
                    proteinSubcellularLocationProperties.put(ProteinSubcellularLocationRel.TOPOLOGY_PROPERTY,
                            topologySt);
                    proteinSubcellularLocationProperties
                            .put(ProteinSubcellularLocationRel.TOPOLOGY_STATUS_PROPERTY, topologyStatusSt);
                    inserter.createRelationship(currentProteinId, lastLocationId, proteinSubcellularLocationRel,
                            proteinSubcellularLocationProperties);

                }
            }
            break;
        case UniprotStuff.COMMENT_ALTERNATIVE_PRODUCTS_TYPE:
            if (uniprotDataXML.getIsoforms()) {
                List<Element> eventList = commentElem.getChildren("event");
                List<Element> isoformList = commentElem.getChildren("isoform");

                for (Element isoformElem : isoformList) {
                    String isoformIdSt = isoformElem.getChildText("id");
                    String isoformNoteSt = isoformElem.getChildText("note");
                    String isoformNameSt = isoformElem.getChildText("name");
                    String isoformSeqSt = "";
                    Element isoSeqElem = isoformElem.getChild("sequence");
                    if (isoSeqElem != null) {
                        String isoSeqTypeSt = isoSeqElem.getAttributeValue("type");
                        if (isoSeqTypeSt.equals("displayed")) {
                            isoformSeqSt = proteinSequence;
                        }
                    }
                    if (isoformNoteSt == null) {
                        isoformNoteSt = "";
                    }
                    if (isoformNameSt == null) {
                        isoformNameSt = "";
                    }
                    isoformProperties.put(IsoformNode.ID_PROPERTY, isoformIdSt);
                    isoformProperties.put(IsoformNode.NOTE_PROPERTY, isoformNoteSt);
                    isoformProperties.put(IsoformNode.NAME_PROPERTY, isoformNameSt);
                    isoformProperties.put(IsoformNode.SEQUENCE_PROPERTY, isoformSeqSt);
                    //--------------------------------------------------------
                    //long isoformId = indexService.getSingleNode(IsoformNode.ISOFORM_ID_INDEX, isoformIdSt);
                    long isoformId = -1;
                    IndexHits<Long> isoformIdIndexHits = isoformIdIndex.get(IsoformNode.ISOFORM_ID_INDEX,
                            isoformIdSt);
                    if (isoformIdIndexHits.hasNext()) {
                        isoformId = isoformIdIndexHits.getSingle();
                    }
                    isoformIdIndexHits.close();
                    if (isoformId < 0) {
                        isoformId = createIsoformNode(isoformProperties, inserter, isoformIdIndex,
                                nodeTypeIndex);
                    }

                    for (Element eventElem : eventList) {

                        String eventTypeSt = eventElem.getAttributeValue("type");
                        switch (eventTypeSt) {
                        case AlternativeProductInitiationRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                            inserter.createRelationship(isoformId, alternativeProductInitiationId,
                                    isoformEventGeneratorRel, null);
                            break;
                        case AlternativeProductPromoterRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                            inserter.createRelationship(isoformId, alternativeProductPromoterId,
                                    isoformEventGeneratorRel, null);
                            break;
                        case AlternativeProductRibosomalFrameshiftingRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                            inserter.createRelationship(isoformId, alternativeProductRibosomalFrameshiftingId,
                                    isoformEventGeneratorRel, null);
                            break;
                        case AlternativeProductSplicingRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                            inserter.createRelationship(isoformId, alternativeProductSplicingId,
                                    isoformEventGeneratorRel, null);
                            break;
                        }
                    }

                    //protein isoform relationship
                    inserter.createRelationship(currentProteinId, isoformId, proteinIsoformRel, null);

                }
            }
            break;
        case UniprotStuff.COMMENT_SEQUENCE_CAUTION_TYPE:
            sequenceCautionProperties.put(BasicProteinSequenceCautionRel.EVIDENCE_PROPERTY, commentEvidenceSt);
            sequenceCautionProperties.put(BasicProteinSequenceCautionRel.STATUS_PROPERTY, commentStatusSt);
            sequenceCautionProperties.put(BasicProteinSequenceCautionRel.TEXT_PROPERTY, commentTextSt);
            Element conflictElem = commentElem.getChild("conflict");
            if (conflictElem != null) {

                String conflictTypeSt = conflictElem.getAttributeValue("type");
                String resourceSt = "";
                String idSt = "";
                String versionSt = "";

                ArrayList<String> positionsList = new ArrayList<>();

                Element sequenceElem = conflictElem.getChild("sequence");
                if (sequenceElem != null) {
                    resourceSt = sequenceElem.getAttributeValue("resource");
                    if (resourceSt == null) {
                        resourceSt = "";
                    }
                    idSt = sequenceElem.getAttributeValue("id");
                    if (idSt == null) {
                        idSt = "";
                    }
                    versionSt = sequenceElem.getAttributeValue("version");
                    if (versionSt == null) {
                        versionSt = "";
                    }
                }

                Element locationElem = commentElem.getChild("location");
                if (locationElem != null) {
                    Element positionElem = locationElem.getChild("position");
                    if (positionElem != null) {
                        String tempPos = positionElem.getAttributeValue("position");
                        if (tempPos != null) {
                            positionsList.add(tempPos);
                        }
                    }
                }

                sequenceCautionProperties.put(BasicProteinSequenceCautionRel.RESOURCE_PROPERTY, resourceSt);
                sequenceCautionProperties.put(BasicProteinSequenceCautionRel.ID_PROPERTY, idSt);
                sequenceCautionProperties.put(BasicProteinSequenceCautionRel.VERSION_PROPERTY, versionSt);
                switch (conflictTypeSt) {
                case ProteinErroneousGeneModelPredictionRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                    if (positionsList.size() > 0) {
                        for (String tempPosition : positionsList) {
                            sequenceCautionProperties.put(BasicProteinSequenceCautionRel.POSITION_PROPERTY,
                                    tempPosition);
                            inserter.createRelationship(currentProteinId,
                                    seqCautionErroneousGeneModelPredictionId,
                                    proteinErroneousGeneModelPredictionRel, sequenceCautionProperties);
                        }
                    } else {
                        sequenceCautionProperties.put(BasicProteinSequenceCautionRel.POSITION_PROPERTY, "");
                        inserter.createRelationship(currentProteinId, seqCautionErroneousGeneModelPredictionId,
                                proteinErroneousGeneModelPredictionRel, sequenceCautionProperties);
                    }
                    break;
                case ProteinErroneousInitiationRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                    if (positionsList.size() > 0) {
                        for (String tempPosition : positionsList) {
                            sequenceCautionProperties.put(BasicProteinSequenceCautionRel.POSITION_PROPERTY,
                                    tempPosition);
                            inserter.createRelationship(currentProteinId, seqCautionErroneousInitiationId,
                                    proteinErroneousInitiationRel, sequenceCautionProperties);
                        }
                    } else {
                        sequenceCautionProperties.put(BasicProteinSequenceCautionRel.POSITION_PROPERTY, "");
                        inserter.createRelationship(currentProteinId, seqCautionErroneousInitiationId,
                                proteinErroneousInitiationRel, sequenceCautionProperties);
                    }
                    break;
                case ProteinErroneousTranslationRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                    if (positionsList.size() > 0) {
                        for (String tempPosition : positionsList) {
                            sequenceCautionProperties.put(BasicProteinSequenceCautionRel.POSITION_PROPERTY,
                                    tempPosition);
                            inserter.createRelationship(currentProteinId, seqCautionErroneousTranslationId,
                                    proteinErroneousTranslationRel, sequenceCautionProperties);
                        }
                    } else {
                        sequenceCautionProperties.put(BasicProteinSequenceCautionRel.POSITION_PROPERTY, "");
                        inserter.createRelationship(currentProteinId, seqCautionErroneousTranslationId,
                                proteinErroneousTranslationRel, sequenceCautionProperties);
                    }
                    break;
                case ProteinErroneousTerminationRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                    if (positionsList.size() > 0) {
                        for (String tempPosition : positionsList) {
                            sequenceCautionProperties.put(BasicProteinSequenceCautionRel.POSITION_PROPERTY,
                                    tempPosition);
                            inserter.createRelationship(currentProteinId, seqCautionErroneousTerminationId,
                                    proteinErroneousTerminationRel, sequenceCautionProperties);
                        }
                    } else {
                        sequenceCautionProperties.put(BasicProteinSequenceCautionRel.POSITION_PROPERTY, "");
                        inserter.createRelationship(currentProteinId, seqCautionErroneousTerminationId,
                                proteinErroneousTerminationRel, sequenceCautionProperties);
                    }
                    break;
                case ProteinFrameshiftRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                    if (positionsList.size() > 0) {
                        for (String tempPosition : positionsList) {
                            sequenceCautionProperties.put(BasicProteinSequenceCautionRel.POSITION_PROPERTY,
                                    tempPosition);
                            inserter.createRelationship(currentProteinId, seqCautionFrameshiftId,
                                    proteinFrameshiftRel, sequenceCautionProperties);
                        }
                    } else {
                        sequenceCautionProperties.put(BasicProteinSequenceCautionRel.POSITION_PROPERTY, "");
                        inserter.createRelationship(currentProteinId, seqCautionFrameshiftId,
                                proteinFrameshiftRel, sequenceCautionProperties);
                    }
                    break;
                case ProteinMiscellaneousDiscrepancyRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                    if (positionsList.size() > 0) {
                        for (String tempPosition : positionsList) {
                            sequenceCautionProperties.put(BasicProteinSequenceCautionRel.POSITION_PROPERTY,
                                    tempPosition);
                            inserter.createRelationship(currentProteinId, seqCautionMiscellaneousDiscrepancyId,
                                    proteinMiscellaneousDiscrepancyRel, sequenceCautionProperties);
                        }
                    } else {
                        sequenceCautionProperties.put(BasicProteinSequenceCautionRel.POSITION_PROPERTY, "");
                        inserter.createRelationship(currentProteinId, seqCautionMiscellaneousDiscrepancyId,
                                proteinMiscellaneousDiscrepancyRel, sequenceCautionProperties);
                    }
                    break;
                }
            }
            break;
        case DevelopmentalStageCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, developmentalStageCommentRel,
                    commentProperties);
            break;
        case MiscellaneousCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, miscellaneousCommentRel,
                    commentProperties);
            break;
        case SimilarityCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, similarityCommentRel,
                    commentProperties);
            break;
        case RnaEditingCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            rnaEditingCommentProperties.put(RnaEditingCommentRel.STATUS_PROPERTY, commentStatusSt);
            rnaEditingCommentProperties.put(RnaEditingCommentRel.EVIDENCE_PROPERTY, commentEvidenceSt);
            rnaEditingCommentProperties.put(RnaEditingCommentRel.TEXT_PROPERTY, commentTextSt);
            List<Element> locationsList = commentElem.getChildren("location");
            for (Element tempLoc : locationsList) {
                String positionSt = tempLoc.getChild("position").getAttributeValue("position");
                rnaEditingCommentProperties.put(RnaEditingCommentRel.POSITION_PROPERTY, positionSt);
                inserter.createRelationship(currentProteinId, commentTypeId, rnaEditingCommentRel,
                        rnaEditingCommentProperties);
            }
            break;
        case PharmaceuticalCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, pharmaceuticalCommentRel,
                    commentProperties);
            break;
        case EnzymeRegulationCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            inserter.createRelationship(currentProteinId, commentTypeId, enzymeRegulationCommentRel,
                    commentProperties);
            break;
        case MassSpectrometryCommentRel.UNIPROT_ATTRIBUTE_TYPE_VALUE:
            String methodSt = commentElem.getAttributeValue("method");
            String massSt = commentElem.getAttributeValue("mass");
            if (methodSt == null) {
                methodSt = "";
            }
            if (massSt == null) {
                massSt = "";
            }
            String beginSt = "";
            String endSt = "";
            Element locationElem = commentElem.getChild("location");
            if (locationElem != null) {
                Element beginElem = commentElem.getChild("begin");
                Element endElem = commentElem.getChild("end");
                if (beginElem != null) {
                    beginSt = beginElem.getAttributeValue("position");
                }

                if (endElem != null) {
                    endSt = endElem.getAttributeValue("position");
                }
            }
            massSpectrometryCommentProperties.put(MassSpectrometryCommentRel.STATUS_PROPERTY, commentStatusSt);
            massSpectrometryCommentProperties.put(MassSpectrometryCommentRel.EVIDENCE_PROPERTY,
                    commentEvidenceSt);
            massSpectrometryCommentProperties.put(MassSpectrometryCommentRel.TEXT_PROPERTY, commentTextSt);
            massSpectrometryCommentProperties.put(MassSpectrometryCommentRel.METHOD_PROPERTY, methodSt);
            massSpectrometryCommentProperties.put(MassSpectrometryCommentRel.MASS_PROPERTY, massSt);
            massSpectrometryCommentProperties.put(MassSpectrometryCommentRel.BEGIN_PROPERTY, beginSt);
            massSpectrometryCommentProperties.put(MassSpectrometryCommentRel.END_PROPERTY, endSt);
            inserter.createRelationship(currentProteinId, commentTypeId, massSpectrometryCommentRel,
                    massSpectrometryCommentProperties);
            break;
        }

    }

}

From source file:com.bio4j.neo4jdb.programs.ImportUniprot.java

License:Open Source License

private static void importProteinCitations(XMLElement entryXMLElem, BatchInserter inserter,
        BatchInserterIndexProvider indexProvider, long currentProteinId, UniprotDataXML uniprotDataXML) {

    //-----------------create batch indexes----------------------------------
    //----------------------------------------------------------------------
    BatchInserterIndex personNameIndex = indexProvider.nodeIndex(PersonNode.PERSON_NAME_FULL_TEXT_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, FULL_TEXT_ST));
    BatchInserterIndex consortiumNameIndex = indexProvider.nodeIndex(ConsortiumNode.CONSORTIUM_NAME_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex thesisTitleIndex = indexProvider.nodeIndex(ThesisNode.THESIS_TITLE_FULL_TEXT_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, FULL_TEXT_ST));
    BatchInserterIndex instituteNameIndex = indexProvider.nodeIndex(InstituteNode.INSTITUTE_NAME_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex countryNameIndex = indexProvider.nodeIndex(CountryNode.COUNTRY_NAME_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex cityNameIndex = indexProvider.nodeIndex(CityNode.CITY_NAME_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex patentNumberIndex = indexProvider.nodeIndex(PatentNode.PATENT_NUMBER_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex bookNameIndex = indexProvider.nodeIndex(BookNode.BOOK_NAME_FULL_TEXT_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, FULL_TEXT_ST));
    BatchInserterIndex publisherNameIndex = indexProvider.nodeIndex(PublisherNode.PUBLISHER_NAME_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex onlineArticleTitleIndex = indexProvider.nodeIndex(
            OnlineArticleNode.ONLINE_ARTICLE_TITLE_FULL_TEXT_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, FULL_TEXT_ST));
    BatchInserterIndex onlineJournalNameIndex = indexProvider.nodeIndex(
            OnlineJournalNode.ONLINE_JOURNAL_NAME_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex submissionTitleIndex = indexProvider.nodeIndex(SubmissionNode.SUBMISSION_TITLE_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, FULL_TEXT_ST));
    BatchInserterIndex articleTitleIndex = indexProvider.nodeIndex(ArticleNode.ARTICLE_TITLE_FULL_TEXT_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, FULL_TEXT_ST));
    BatchInserterIndex articleDoiIdIndex = indexProvider.nodeIndex(ArticleNode.ARTICLE_DOI_ID_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex articlePubmedIdIndex = indexProvider.nodeIndex(ArticleNode.ARTICLE_PUBMED_ID_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex articleMedlineIdIndex = indexProvider.nodeIndex(ArticleNode.ARTICLE_MEDLINE_ID_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex journalNameIndex = indexProvider.nodeIndex(JournalNode.JOURNAL_NAME_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex nodeTypeIndex = indexProvider.nodeIndex(Bio4jManager.NODE_TYPE_INDEX_NAME,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    BatchInserterIndex dbNameIndex = indexProvider.nodeIndex(DBNode.DB_NAME_INDEX,
            MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST));
    //----------------------------------------------------------------------
    //----------------------------------------------------------------------

    List<Element> referenceList = entryXMLElem.asJDomElement().getChildren(UniprotStuff.REFERENCE_TAG_NAME);

    for (Element reference : referenceList) {
        List<Element> citationsList = reference.getChildren(UniprotStuff.CITATION_TAG_NAME);
        for (Element citation : citationsList) {

            String citationType = citation.getAttributeValue(UniprotStuff.DB_REFERENCE_TYPE_ATTRIBUTE);

            List<Long> authorsPersonNodesIds = new ArrayList<>();
            List<Long> authorsConsortiumNodesIds = new ArrayList<>();

            List<Element> authorPersonElems = citation.getChild("authorList").getChildren("person");
            List<Element> authorConsortiumElems = citation.getChild("authorList").getChildren("consortium");

            for (Element person : authorPersonElems) {
                //long personId = indexService.getSingleNode(PersonNode.PERSON_NAME_INDEX, person.getAttributeValue("name"));
                long personId = -1;
                IndexHits<Long> personNameIndexHits = personNameIndex
                        .get(PersonNode.PERSON_NAME_FULL_TEXT_INDEX, person.getAttributeValue("name"));
                if (personNameIndexHits.hasNext()) {
                    personId = personNameIndexHits.getSingle();
                }/*w ww. ja va2s.  com*/
                personNameIndexHits.close();
                if (personId < 0) {
                    personProperties.put(PersonNode.NAME_PROPERTY, person.getAttributeValue("name"));
                    personId = createPersonNode(personProperties, inserter, personNameIndex, nodeTypeIndex);
                    //flushing person name index
                    personNameIndex.flush();
                }
                authorsPersonNodesIds.add(personId);
            }

            for (Element consortium : authorConsortiumElems) {

                long consortiumId = -1;
                IndexHits<Long> consortiumIdIndexHits = consortiumNameIndex
                        .get(ConsortiumNode.CONSORTIUM_NAME_INDEX, consortium.getAttributeValue("name"));
                if (consortiumIdIndexHits.hasNext()) {
                    consortiumId = consortiumIdIndexHits.getSingle();
                }
                consortiumIdIndexHits.close();
                if (consortiumId < 0) {
                    consortiumProperties.put(ConsortiumNode.NAME_PROPERTY,
                            consortium.getAttributeValue("name"));
                    consortiumId = createConsortiumNode(consortiumProperties, inserter, consortiumNameIndex,
                            nodeTypeIndex);
                    //---flushing consortium name index--
                    consortiumNameIndex.flush();
                }
                authorsConsortiumNodesIds.add(consortiumId);
            }
            //----------------------------------------------------------------------------
            //-----------------------------THESIS-----------------------------------------
            switch (citationType) {
            case ThesisNode.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                if (uniprotDataXML.getThesis()) {
                    String dateSt = citation.getAttributeValue("date");
                    String titleSt = citation.getChildText("title");
                    if (dateSt == null) {
                        dateSt = "";
                    }
                    if (titleSt == null) {
                        titleSt = "";
                    }

                    long thesisId = -1;
                    IndexHits<Long> thesisTitleIndexHits = thesisTitleIndex
                            .get(ThesisNode.THESIS_TITLE_FULL_TEXT_INDEX, titleSt);
                    if (thesisTitleIndexHits.hasNext()) {
                        thesisId = thesisTitleIndexHits.getSingle();
                    }
                    thesisTitleIndexHits.close();
                    if (thesisId < 0) {
                        thesisProperties.put(ThesisNode.DATE_PROPERTY, dateSt);
                        thesisProperties.put(ThesisNode.TITLE_PROPERTY, titleSt);
                        //---thesis node creation and indexing
                        thesisId = inserter.createNode(thesisProperties);
                        nodeTypeIndex.add(thesisId,
                                MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, ThesisNode.NODE_TYPE));
                        thesisTitleIndex.add(thesisId,
                                MapUtil.map(ThesisNode.THESIS_TITLE_FULL_TEXT_INDEX, titleSt));
                        //flushing thesis title index
                        thesisTitleIndex.flush();
                        //---authors association-----
                        for (long personId : authorsPersonNodesIds) {
                            inserter.createRelationship(thesisId, personId, thesisAuthorRel, null);
                        }

                        //-----------institute-----------------------------
                        String instituteSt = citation.getAttributeValue("institute");
                        String countrySt = citation.getAttributeValue("country");
                        if (instituteSt != null) {

                            long instituteId = -1;
                            IndexHits<Long> instituteNameIndexHits = instituteNameIndex
                                    .get(InstituteNode.INSTITUTE_NAME_INDEX, instituteSt);
                            if (instituteNameIndexHits.hasNext()) {
                                instituteId = instituteNameIndexHits.getSingle();
                            }
                            instituteNameIndexHits.close();
                            if (instituteId < 0) {
                                instituteProperties.put(InstituteNode.NAME_PROPERTY, instituteSt);
                                instituteId = createInstituteNode(instituteProperties, inserter,
                                        instituteNameIndex, nodeTypeIndex);
                                //flushing institute name index
                                instituteNameIndex.flush();
                            }
                            if (countrySt != null) {
                                //long countryId = indexService.getSingleNode(CountryNode.COUNTRY_NAME_INDEX, countrySt);
                                long countryId = -1;
                                IndexHits<Long> countryNameIndexHits = countryNameIndex
                                        .get(CountryNode.COUNTRY_NAME_INDEX, countrySt);
                                if (countryNameIndexHits.hasNext()) {
                                    countryId = countryNameIndexHits.getSingle();
                                }
                                countryNameIndexHits.close();
                                if (countryId < 0) {
                                    countryProperties.put(CountryNode.NAME_PROPERTY, countrySt);
                                    countryId = createCountryNode(countryProperties, inserter, countryNameIndex,
                                            nodeTypeIndex);
                                    //flushing country name index
                                    countryNameIndex.flush();
                                }
                                inserter.createRelationship(instituteId, countryId, instituteCountryRel, null);
                            }
                            inserter.createRelationship(thesisId, instituteId, thesisInstituteRel, null);
                        }
                    }

                    //--protein citation relationship
                    inserter.createRelationship(thesisId, currentProteinId, thesisProteinCitationRel, null);

                }

                //----------------------------------------------------------------------------
                //-----------------------------PATENT-----------------------------------------
                break;
            case PatentNode.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                if (uniprotDataXML.getPatents()) {
                    String numberSt = citation.getAttributeValue("number");
                    String dateSt = citation.getAttributeValue("date");
                    String titleSt = citation.getChildText("title");
                    if (dateSt == null) {
                        dateSt = "";
                    }
                    if (titleSt == null) {
                        titleSt = "";
                    }
                    if (numberSt == null) {
                        numberSt = "";
                    }

                    if (!numberSt.equals("")) {
                        long patentId = -1;
                        IndexHits<Long> patentNumberIndexHits = patentNumberIndex
                                .get(PatentNode.PATENT_NUMBER_INDEX, numberSt);
                        if (patentNumberIndexHits.hasNext()) {
                            patentId = patentNumberIndexHits.getSingle();
                        }
                        patentNumberIndexHits.close();

                        if (patentId < 0) {
                            patentProperties.put(PatentNode.NUMBER_PROPERTY, numberSt);
                            patentProperties.put(PatentNode.DATE_PROPERTY, dateSt);
                            patentProperties.put(PatentNode.TITLE_PROPERTY, titleSt);
                            //---patent node creation and indexing
                            patentId = inserter.createNode(patentProperties);
                            patentNumberIndex.add(patentId,
                                    MapUtil.map(PatentNode.PATENT_NUMBER_INDEX, numberSt));
                            nodeTypeIndex.add(patentId,
                                    MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, PatentNode.NODE_TYPE));
                            //---flushing patent number index---
                            patentNumberIndex.flush();
                            //---authors association-----
                            for (long personId : authorsPersonNodesIds) {
                                inserter.createRelationship(patentId, personId, patentAuthorRel, null);
                            }
                        }

                        //--protein citation relationship
                        inserter.createRelationship(patentId, currentProteinId, patentProteinCitationRel, null);
                    }
                }

                //----------------------------------------------------------------------------
                //-----------------------------SUBMISSION-----------------------------------------
                break;
            case SubmissionNode.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                if (uniprotDataXML.getSubmissions()) {
                    String dateSt = citation.getAttributeValue("date");
                    String titleSt = citation.getChildText("title");
                    String dbSt = citation.getAttributeValue("db");
                    if (dateSt == null) {
                        dateSt = "";
                    }
                    if (titleSt == null) {
                        titleSt = "";
                    }

                    submissionProperties.put(SubmissionNode.DATE_PROPERTY, dateSt);
                    submissionProperties.put(SubmissionNode.TITLE_PROPERTY, titleSt);

                    long submissionId;
                    IndexHits<Long> submissionTitleIndexHits = submissionTitleIndex
                            .get(SubmissionNode.SUBMISSION_TITLE_INDEX, titleSt);
                    if (submissionTitleIndexHits.hasNext()) {
                        submissionId = submissionTitleIndexHits.getSingle();
                        submissionTitleIndexHits.close();
                    } else {
                        //---submission node creation and indexing
                        submissionId = inserter.createNode(submissionProperties);
                        //--indexing node by type---
                        nodeTypeIndex.add(submissionId,
                                MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, SubmissionNode.NODE_TYPE));
                        if (!titleSt.isEmpty()) {
                            //--indexing node by title---
                            submissionTitleIndex.add(submissionId,
                                    MapUtil.map(SubmissionNode.SUBMISSION_TITLE_INDEX, titleSt));
                            submissionTitleIndex.flush();
                        }
                    }

                    //---authors association-----
                    for (long personId : authorsPersonNodesIds) {
                        inserter.createRelationship(submissionId, personId, submissionAuthorRel, null);
                    }
                    //---authors consortium association-----
                    for (long consortiumId : authorsConsortiumNodesIds) {
                        inserter.createRelationship(submissionId, consortiumId, submissionAuthorRel, null);
                    }

                    if (dbSt != null) {
                        long dbId = -1;
                        IndexHits<Long> dbNameIndexHits = dbNameIndex.get(DBNode.DB_NAME_INDEX, dbSt);
                        if (dbNameIndexHits.hasNext()) {
                            dbId = dbNameIndexHits.getSingle();
                        }
                        dbNameIndexHits.close();
                        if (dbId < 0) {
                            dbProperties.put(DBNode.NODE_TYPE_PROPERTY, DBNode.NODE_TYPE);
                            dbProperties.put(DBNode.NAME_PROPERTY, dbSt);
                            dbId = createDbNode(dbProperties, inserter, dbNameIndex, nodeTypeIndex);
                            dbNameIndex.flush();
                        }
                        //-----submission db relationship-----
                        inserter.createRelationship(submissionId, dbId, submissionDbRel, null);
                    }

                    //--protein citation relationship
                    inserter.createRelationship(submissionId, currentProteinId, submissionProteinCitationRel,
                            null);

                }

                //----------------------------------------------------------------------------
                //-----------------------------BOOK-----------------------------------------
                break;
            case BookNode.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                if (uniprotDataXML.getBooks()) {
                    String nameSt = citation.getAttributeValue("name");
                    String dateSt = citation.getAttributeValue("date");
                    String titleSt = citation.getChildText("title");
                    String publisherSt = citation.getAttributeValue("publisher");
                    String firstSt = citation.getAttributeValue("first");
                    String lastSt = citation.getAttributeValue("last");
                    String citySt = citation.getAttributeValue("city");
                    String volumeSt = citation.getAttributeValue("volume");
                    if (nameSt == null) {
                        nameSt = "";
                    }
                    if (dateSt == null) {
                        dateSt = "";
                    }
                    if (titleSt == null) {
                        titleSt = "";
                    }
                    if (publisherSt == null) {
                        publisherSt = "";
                    }
                    if (firstSt == null) {
                        firstSt = "";
                    }
                    if (lastSt == null) {
                        lastSt = "";
                    }
                    if (citySt == null) {
                        citySt = "";
                    }
                    if (volumeSt == null) {
                        volumeSt = "";
                    }

                    long bookId = -1;
                    IndexHits<Long> bookNameIndexHits = bookNameIndex.get(BookNode.BOOK_NAME_FULL_TEXT_INDEX,
                            nameSt);
                    if (bookNameIndexHits.hasNext()) {
                        bookId = bookNameIndexHits.getSingle();
                    }
                    bookNameIndexHits.close();

                    if (bookId < 0) {
                        bookProperties.put(BookNode.NAME_PROPERTY, nameSt);
                        bookProperties.put(BookNode.DATE_PROPERTY, dateSt);
                        //---book node creation and indexing
                        bookId = inserter.createNode(bookProperties);

                        bookNameIndex.add(bookId, MapUtil.map(BookNode.BOOK_NAME_FULL_TEXT_INDEX, nameSt));
                        //--indexing node by type---
                        nodeTypeIndex.add(bookId,
                                MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, BookNode.NODE_TYPE));

                        //--flushing book name index---
                        bookNameIndex.flush();
                        //---authors association-----
                        for (long personId : authorsPersonNodesIds) {
                            inserter.createRelationship(bookId, personId, bookAuthorRel, null);
                        }

                        //---editor association-----
                        Element editorListElem = citation.getChild("editorList");
                        if (editorListElem != null) {
                            List<Element> editorsElems = editorListElem.getChildren("person");
                            for (Element person : editorsElems) {
                                //long editorId = indexService.getSingleNode(PersonNode.PERSON_NAME_INDEX, person.getAttributeValue("name"));
                                long editorId = -1;
                                IndexHits<Long> personNameIndexHits = personNameIndex.get(
                                        PersonNode.PERSON_NAME_FULL_TEXT_INDEX,
                                        person.getAttributeValue("name"));
                                if (personNameIndexHits.hasNext()) {
                                    editorId = personNameIndexHits.getSingle();
                                }
                                personNameIndexHits.close();
                                if (editorId < 0) {
                                    personProperties.put(PersonNode.NAME_PROPERTY,
                                            person.getAttributeValue("name"));
                                    editorId = createPersonNode(personProperties, inserter, personNameIndex,
                                            nodeTypeIndex);
                                }
                                //---flushing person name index---
                                personNameIndex.flush();
                                //editor association
                                inserter.createRelationship(bookId, editorId, bookEditorRel, null);
                            }
                        }

                        //----publisher--
                        if (!publisherSt.equals("")) {
                            //long publisherId = indexService.getSingleNode(PublisherNode.PUBLISHER_NAME_INDEX, publisherSt);
                            long publisherId = -1;
                            IndexHits<Long> publisherNameIndexHits = publisherNameIndex
                                    .get(PublisherNode.PUBLISHER_NAME_INDEX, publisherSt);
                            if (publisherNameIndexHits.hasNext()) {
                                publisherId = publisherNameIndexHits.getSingle();
                            }
                            publisherNameIndexHits.close();
                            if (publisherId < 0) {
                                publisherProperties.put(PublisherNode.NAME_PROPERTY, publisherSt);
                                publisherId = inserter.createNode(publisherProperties);
                                //--indexing node by type---
                                nodeTypeIndex.add(publisherId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME,
                                        PublisherNode.NODE_TYPE));
                                publisherNameIndex.add(publisherId,
                                        MapUtil.map(PublisherNode.PUBLISHER_NAME_INDEX, publisherSt));
                                //--flushing publisher name index--
                                publisherNameIndex.flush();
                            }
                            inserter.createRelationship(bookId, publisherId, bookPublisherRel, null);
                        }

                        //-----city-----
                        if (!citySt.equals("")) {
                            //long cityId = indexService.getSingleNode(CityNode.CITY_NAME_INDEX, citySt);
                            long cityId = -1;
                            IndexHits<Long> cityNameIndexHits = cityNameIndex.get(CityNode.CITY_NAME_INDEX,
                                    citySt);
                            if (cityNameIndexHits.hasNext()) {
                                cityId = cityNameIndexHits.getSingle();
                            }
                            cityNameIndexHits.close();
                            if (cityId < 0) {
                                cityProperties.put(CityNode.NAME_PROPERTY, citySt);
                                cityId = createCityNode(cityProperties, inserter, cityNameIndex, nodeTypeIndex);
                                //-----flushing city name index---
                                cityNameIndex.flush();
                            }
                            inserter.createRelationship(bookId, cityId, bookCityRel, null);
                        }
                    }

                    bookProteinCitationProperties.put(BookProteinCitationRel.FIRST_PROPERTY, firstSt);
                    bookProteinCitationProperties.put(BookProteinCitationRel.LAST_PROPERTY, lastSt);
                    bookProteinCitationProperties.put(BookProteinCitationRel.VOLUME_PROPERTY, volumeSt);
                    bookProteinCitationProperties.put(BookProteinCitationRel.TITLE_PROPERTY, titleSt);
                    //--protein citation relationship
                    inserter.createRelationship(bookId, currentProteinId, bookProteinCitationRel,
                            bookProteinCitationProperties);

                }

                //----------------------------------------------------------------------------
                //-----------------------------ONLINE ARTICLE-----------------------------------------
                break;
            case OnlineArticleNode.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                if (uniprotDataXML.getOnlineArticles()) {
                    String locatorSt = citation.getChildText("locator");
                    String nameSt = citation.getAttributeValue("name");
                    String titleSt = citation.getChildText("title");

                    if (titleSt == null) {
                        titleSt = "";
                    }
                    if (nameSt == null) {
                        nameSt = "";
                    }
                    if (locatorSt == null) {
                        locatorSt = "";
                    }

                    long onlineArticleId = -1;
                    IndexHits<Long> onlineArticleTitleIndexHits = onlineArticleTitleIndex
                            .get(OnlineArticleNode.ONLINE_ARTICLE_TITLE_FULL_TEXT_INDEX, titleSt);
                    if (onlineArticleTitleIndexHits.hasNext()) {
                        onlineArticleId = onlineArticleTitleIndexHits.getSingle();
                    }
                    onlineArticleTitleIndexHits.close();
                    if (onlineArticleId < 0) {
                        onlineArticleProperties.put(OnlineArticleNode.TITLE_PROPERTY, titleSt);
                        onlineArticleId = inserter.createNode(onlineArticleProperties);
                        //--indexing node by type---
                        nodeTypeIndex.add(onlineArticleId,
                                MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, OnlineArticleNode.NODE_TYPE));

                        if (!titleSt.equals("")) {
                            onlineArticleTitleIndex.add(onlineArticleId, MapUtil
                                    .map(OnlineArticleNode.ONLINE_ARTICLE_TITLE_FULL_TEXT_INDEX, titleSt));
                            //-----flushing online article title index---
                            onlineArticleTitleIndex.flush();
                        }

                        //---authors person association-----
                        for (long personId : authorsPersonNodesIds) {
                            inserter.createRelationship(onlineArticleId, personId, onlineArticleAuthorRel,
                                    null);
                        }
                        //---authors consortium association-----
                        for (long consortiumId : authorsConsortiumNodesIds) {
                            inserter.createRelationship(onlineArticleId, consortiumId, onlineArticleAuthorRel,
                                    null);
                        }

                        //------online journal-----------
                        if (!nameSt.equals("")) {

                            long onlineJournalId = -1;
                            IndexHits<Long> onlineJournalNameIndexHits = onlineJournalNameIndex
                                    .get(OnlineJournalNode.ONLINE_JOURNAL_NAME_INDEX, nameSt);
                            if (onlineJournalNameIndexHits.hasNext()) {
                                onlineJournalId = onlineJournalNameIndexHits.getSingle();
                            }
                            onlineJournalNameIndexHits.close();
                            if (onlineJournalId < 0) {
                                onlineJournalProperties.put(OnlineJournalNode.NAME_PROPERTY, nameSt);
                                onlineJournalId = inserter.createNode(onlineJournalProperties);
                                //--indexing node by type---
                                nodeTypeIndex.add(onlineJournalId, MapUtil
                                        .map(Bio4jManager.NODE_TYPE_INDEX_NAME, OnlineJournalNode.NODE_TYPE));
                                onlineJournalNameIndex.add(onlineJournalId,
                                        MapUtil.map(OnlineJournalNode.ONLINE_JOURNAL_NAME_INDEX, nameSt));

                                //---flushing online journal name index---
                                onlineJournalNameIndex.flush();
                            }

                            onlineArticleJournalProperties.put(OnlineArticleJournalRel.LOCATOR_PROPERTY,
                                    locatorSt);
                            inserter.createRelationship(onlineArticleId, onlineJournalId,
                                    onlineArticleJournalRel, onlineArticleJournalProperties);
                        }
                        //----------------------------
                    }
                    //protein citation
                    inserter.createRelationship(onlineArticleId, currentProteinId,
                            onlineArticleProteinCitationRel, null);

                }

                //----------------------------------------------------------------------------
                //-----------------------------ARTICLE-----------------------------------------
                break;
            case ArticleNode.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                if (uniprotDataXML.getArticles()) {
                    String journalNameSt = citation.getAttributeValue("name");
                    String dateSt = citation.getAttributeValue("date");
                    String titleSt = citation.getChildText("title");
                    String firstSt = citation.getAttributeValue("first");
                    String lastSt = citation.getAttributeValue("last");
                    String volumeSt = citation.getAttributeValue("volume");
                    String doiSt = "";
                    String medlineSt = "";
                    String pubmedSt = "";

                    if (journalNameSt == null) {
                        journalNameSt = "";
                    }
                    if (dateSt == null) {
                        dateSt = "";
                    }
                    if (firstSt == null) {
                        firstSt = "";
                    }
                    if (lastSt == null) {
                        lastSt = "";
                    }
                    if (volumeSt == null) {
                        volumeSt = "";
                    }
                    if (titleSt == null) {
                        titleSt = "";
                    }

                    List<Element> dbReferences = citation.getChildren("dbReference");
                    for (Element tempDbRef : dbReferences) {
                        switch (tempDbRef.getAttributeValue("type")) {
                        case "DOI":
                            doiSt = tempDbRef.getAttributeValue("id");
                            break;
                        case "MEDLINE":
                            medlineSt = tempDbRef.getAttributeValue("id");
                            break;
                        case "PubMed":
                            pubmedSt = tempDbRef.getAttributeValue("id");
                            break;
                        }
                    }

                    //long articleId = indexService.getSingleNode(ArticleNode.ARTICLE_TITLE_FULL_TEXT_INDEX, titleSt);
                    long articleId = -1;
                    IndexHits<Long> articleTitleIndexHits = articleTitleIndex
                            .get(ArticleNode.ARTICLE_TITLE_FULL_TEXT_INDEX, titleSt);
                    if (articleTitleIndexHits.hasNext()) {
                        articleId = articleTitleIndexHits.getSingle();
                    }
                    articleTitleIndexHits.close();
                    if (articleId < 0) {
                        articleProperties.put(ArticleNode.TITLE_PROPERTY, titleSt);
                        articleProperties.put(ArticleNode.DOI_ID_PROPERTY, doiSt);
                        articleProperties.put(ArticleNode.MEDLINE_ID_PROPERTY, medlineSt);
                        articleProperties.put(ArticleNode.PUBMED_ID_PROPERTY, pubmedSt);
                        articleId = inserter.createNode(articleProperties);
                        //--indexing node by type---
                        nodeTypeIndex.add(articleId,
                                MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, ArticleNode.NODE_TYPE));
                        if (!titleSt.equals("")) {
                            articleTitleIndex.add(articleId,
                                    MapUtil.map(ArticleNode.ARTICLE_TITLE_FULL_TEXT_INDEX, titleSt));
                            //--flushing article title index---
                            articleTitleIndex.flush();
                        }

                        //---indexing by medline, doi and pubmed--
                        if (!doiSt.isEmpty()) {
                            articleDoiIdIndex.add(articleId,
                                    MapUtil.map(ArticleNode.ARTICLE_DOI_ID_INDEX, doiSt));
                        }
                        if (!medlineSt.isEmpty()) {
                            articleMedlineIdIndex.add(articleId,
                                    MapUtil.map(ArticleNode.ARTICLE_MEDLINE_ID_INDEX, medlineSt));
                        }
                        if (!pubmedSt.isEmpty()) {
                            articlePubmedIdIndex.add(articleId,
                                    MapUtil.map(ArticleNode.ARTICLE_PUBMED_ID_INDEX, pubmedSt));
                        }

                        //---authors person association-----
                        for (long personId : authorsPersonNodesIds) {
                            inserter.createRelationship(articleId, personId, articleAuthorRel, null);
                        }
                        //---authors consortium association-----
                        for (long consortiumId : authorsConsortiumNodesIds) {
                            inserter.createRelationship(articleId, consortiumId, articleAuthorRel, null);
                        }

                        //------journal-----------
                        if (!journalNameSt.equals("")) {
                            //long journalId = indexService.getSingleNode(JournalNode.JOURNAL_NAME_INDEX, journalNameSt);
                            long journalId = -1;
                            IndexHits<Long> journalNameIndexHits = journalNameIndex
                                    .get(JournalNode.JOURNAL_NAME_INDEX, journalNameSt);
                            if (journalNameIndexHits.hasNext()) {
                                journalId = journalNameIndexHits.getSingle();
                            }
                            journalNameIndexHits.close();
                            if (journalId < 0) {
                                journalProperties.put(JournalNode.NAME_PROPERTY, journalNameSt);
                                journalId = inserter.createNode(journalProperties);
                                //--indexing node by type---
                                nodeTypeIndex.add(journalId,
                                        MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME, JournalNode.NODE_TYPE));
                                journalNameIndex.add(journalId,
                                        MapUtil.map(JournalNode.JOURNAL_NAME_INDEX, journalNameSt));
                                //----flushing journal name index----
                                journalNameIndex.flush();
                            }

                            articleJournalProperties.put(ArticleJournalRel.DATE_PROPERTY, dateSt);
                            articleJournalProperties.put(ArticleJournalRel.FIRST_PROPERTY, firstSt);
                            articleJournalProperties.put(ArticleJournalRel.LAST_PROPERTY, lastSt);
                            articleJournalProperties.put(ArticleJournalRel.VOLUME_PROPERTY, volumeSt);
                            inserter.createRelationship(articleId, journalId, articleJournalRel,
                                    articleJournalProperties);
                        }
                        //----------------------------
                    }
                    //protein citation
                    inserter.createRelationship(articleId, currentProteinId, articleProteinCitationRel, null);

                }

                //----------------------------------------------------------------------------
                //----------------------UNPUBLISHED OBSERVATIONS-----------------------------------------
                break;
            case UnpublishedObservationNode.UNIPROT_ATTRIBUTE_TYPE_VALUE:
                if (uniprotDataXML.getUnpublishedObservations()) {
                    String dateSt = citation.getAttributeValue("date");
                    if (dateSt == null) {
                        dateSt = "";
                    }

                    unpublishedObservationProperties.put(UnpublishedObservationNode.DATE_PROPERTY, dateSt);
                    long unpublishedObservationId = inserter.createNode(unpublishedObservationProperties);
                    //--indexing node by type---
                    nodeTypeIndex.add(unpublishedObservationId, MapUtil.map(Bio4jManager.NODE_TYPE_INDEX_NAME,
                            UnpublishedObservationNode.NODE_TYPE));

                    //---authors person association-----
                    for (long personId : authorsPersonNodesIds) {
                        inserter.createRelationship(unpublishedObservationId, personId,
                                unpublishedObservationAuthorRel, null);
                    }

                    inserter.createRelationship(unpublishedObservationId, currentProteinId,
                            unpublishedObservationProteinCitationRel, null);
                }
                break;
            }
        }
    }

}

From source file:com.bio4j.neo4jdb.programs.ImportUniref.java

License:Open Source License

private static String getRepresentantAccession(Element elem) {
    String result = null;/*  ww  w . ja  v a2 s .  c om*/
    Element dbReference = elem.getChild("dbReference");
    List<Element> properties = dbReference.getChildren("property");
    for (Element prop : properties) {
        if (prop.getAttributeValue("type").equals("UniProtKB accession")) {
            result = prop.getAttributeValue("value");
        }
    }

    return result;
}

From source file:com.bio4j.neo4jdb.programs.ImportUniref.java

License:Open Source License

private static int importUnirefFile(BatchInserter inserter, BatchInserterIndex proteinAccessionIndex,
        BatchInserterIndex isoformIdIndex, File unirefFile, BasicRelationship relationship) throws Exception {

    StringBuilder entryStBuilder = new StringBuilder();

    BufferedReader reader = new BufferedReader(new FileReader(unirefFile));
    String line;//  ww w  . j  a v  a 2s .c  o  m

    int entryCounter = 0;
    int limitForPrintingOut = 10000;

    while ((line = reader.readLine()) != null) {
        //----we reached a entry line-----
        if (line.trim().startsWith("<" + UniprotStuff.ENTRY_TAG_NAME)) {

            while (!line.trim().startsWith("</" + UniprotStuff.ENTRY_TAG_NAME + ">")) {
                entryStBuilder.append(line);
                line = reader.readLine();
            }
            //organism last line
            entryStBuilder.append(line);

            XMLElement entryXMLElem = new XMLElement(entryStBuilder.toString());
            entryStBuilder.delete(0, entryStBuilder.length());

            ArrayList<String> membersAccessionList = new ArrayList<String>();
            Element representativeMember = entryXMLElem.asJDomElement().getChild("representativeMember");
            String representantAccession = getRepresentantAccession(representativeMember);

            List<Element> members = entryXMLElem.asJDomElement().getChildren("member");
            for (Element member : members) {
                Element memberDbReference = member.getChild("dbReference");
                List<Element> memberProperties = memberDbReference.getChildren("property");
                for (Element prop : memberProperties) {
                    if (prop.getAttributeValue("type").equals("UniProtKB accession")) {
                        String memberAccession = prop.getAttributeValue("value");
                        membersAccessionList.add(memberAccession);
                    }
                }
            }

            if (representantAccession != null) {

                long representantId = -1;

                //---The representant is an isoform----
                if (representantAccession.contains("-")) {

                    IndexHits<Long> repIndexHits = isoformIdIndex.get(IsoformNode.ISOFORM_ID_INDEX,
                            representantAccession);
                    if (repIndexHits.size() == 1) {
                        representantId = repIndexHits.getSingle();
                    }
                    repIndexHits.close();

                } //---The representant is a protein
                else {

                    IndexHits<Long> hits = proteinAccessionIndex.get(ProteinNode.PROTEIN_ACCESSION_INDEX,
                            representantAccession);
                    if (hits.size() == 1) {
                        //System.out.println("representantAccession = " + representantAccession);
                        representantId = hits.getSingle();
                    }
                    hits.close();

                }

                //----we only create the relationships in the case where we found
                // a valid representant id-----
                if (representantId >= 0) {

                    for (String memberAccession : membersAccessionList) {
                        long memberId = -1;
                        if (memberAccession.contains("-")) {
                            IndexHits<Long> isoHits = isoformIdIndex.get(IsoformNode.ISOFORM_ID_INDEX,
                                    memberAccession);
                            if (isoHits.size() == 1) {
                                memberId = isoHits.getSingle();
                            }
                            isoHits.close();
                        } else {
                            IndexHits<Long> protHits = proteinAccessionIndex
                                    .get(ProteinNode.PROTEIN_ACCESSION_INDEX, memberAccession);
                            if (protHits.size() == 1) {
                                memberId = protHits.getSingle();
                            }
                            protHits.close();
                        }

                        if (memberId >= 0) {
                            inserter.createRelationship(representantId, memberId, relationship, null);
                        }

                    }
                }
            } else {
                logger.log(Level.SEVERE, ("null representant accession for entry: "
                        + entryXMLElem.asJDomElement().getAttributeValue("id")));
            }

        }

        entryCounter++;
        if ((entryCounter % limitForPrintingOut) == 0) {
            logger.log(Level.INFO, (entryCounter + " entries parsed!!"));
        }

    }
    reader.close();

    return entryCounter;
}

From source file:com.c4om.autoconf.ulysses.extra.svinchangesetgenerator.SVINChangesetGenerator.java

License:Apache License

/**
 * This method generates a changeset document, which describes what nodes
 * must be added and replaced. It generates it from the SVRLInterpreter
 * report passed at constructor./* ww  w  .ja  v a  2 s.  c o  m*/
 * 
 * @param pathToConfiguration path to the runtime configuration.
 * 
 * @param reportDocument the report document (objective solution description).
 * 
 * @return The generated changeset document.
 * 
 * @throws JDOMException
 *             If there are problems at JDOM2 XML parsings
 * @throws IOException
 *             I/O problems
 * @throws SaxonApiException
 *             problems with Saxon API while transforming metamodel
 *             suggestions into partial autocomplete nodes
 * @throws ParserConfigurationException
 *             problems with javax.xml APIs while transforming metamodel
 *             suggestions into partial autocomplete nodes
 */
public Document getSingleChangesetDocument(String pathToConfiguration, Document reportDocument)
        throws JDOMException, IOException, SaxonApiException, ParserConfigurationException {
    Element resultRoot = new Element("changeset", AutoconfXMLConstants.NAMESPACE_SVINAPPLIER);
    resultRoot.addNamespaceDeclaration(NAMESPACE_AUTOCONF_METADATA); // To
    // prevent
    // several
    // "xmlns:*****"
    // attributes
    // to
    // appear
    // everywhere
    Document result = new Document(resultRoot);
    Element reportElement = reportDocument.getRootElement();
    for (Element currentDiscrepancyElement : reportElement.getChildren()) {
        boolean isCreate = false;
        Element interestingPathsElement = currentDiscrepancyElement.getChild("interestingPaths",
                NAMESPACE_SVRL_INTERPETER_REPORT);
        String searchPathText = interestingPathsElement.getAttributeValue("search-path");
        String basePathText = interestingPathsElement.getAttributeValue("base-path");
        String keySubpathText = interestingPathsElement.getAttributeValue("key-subpath");
        // First, we look for a path to search the element where discrepancy
        // took place (if it exists)
        String[] docAndPath;
        String searchPathInternal;
        if (searchPathText == null) {
            docAndPath = divideDocAndPath(basePathText);
            searchPathInternal = docAndPath[1] + "[" + keySubpathText + "]";
        } else {
            docAndPath = divideDocAndPath(searchPathText);
            searchPathInternal = docAndPath[1];
        }
        if (!documentCache.containsKey(docAndPath[0])) {
            documentCache.put(docAndPath[0],
                    loadJDOMDocumentFromFile(new File(pathToConfiguration + "/" + docAndPath[0])));
        }
        Document currentDoc = documentCache.get(docAndPath[0]);
        List<Element> discordingElementAtDocList = performJAXENXPath(searchPathInternal, currentDoc,
                Filters.element(), xpathNamespaces);
        if (discordingElementAtDocList.size() == 0) {
            isCreate = true;
        }
        if (isCreate) {
            Element nodeToCreate = currentDiscrepancyElement
                    .getChild("suggestedPartialNode", NAMESPACE_SVRL_INTERPETER_REPORT).getChildren().get(0)
                    .clone();
            //Sometimes, svinrep namespace is declared here (it is not clear why). We must remove it.
            nodeToCreate.removeNamespaceDeclaration(NAMESPACE_SVRL_INTERPETER_REPORT);
            boolean thereAreMetamodelSuggestions = currentDiscrepancyElement
                    .getChild("metamodelSuggestions", NAMESPACE_SVRL_INTERPETER_REPORT).getChildren()
                    .size() > 0;
            if (thereAreMetamodelSuggestions) {
                Element metamodelSuggestionUntransformed = currentDiscrepancyElement
                        .getChild("metamodelSuggestions", NAMESPACE_SVRL_INTERPETER_REPORT).getChildren().get(0)
                        .clone();
                Document suggestionMiniDocument = new Document(metamodelSuggestionUntransformed);
                Document suggestionMiniDocumentTransformed = performXSLT(suggestionMiniDocument,
                        xsltTransformMetamodelDocument);
                Element metamodelSuggestion = suggestionMiniDocumentTransformed.getRootElement();
                Attribute metadataAttribute = new Attribute("autogen-from", "metamodel",
                        NAMESPACE_AUTOCONF_METADATA);
                mixTreesRecursive(metamodelSuggestion, nodeToCreate, metadataAttribute,
                        NAMESPACE_AUTOCONF_METADATA.getURI());
            } else {
                Attribute mayNeedManualCompletion = new Attribute("may-need-completion", "true",
                        NAMESPACE_AUTOCONF_METADATA);
                nodeToCreate.setAttribute(mayNeedManualCompletion);
            }
            Element createNodeElement = new Element("add-node", AutoconfXMLConstants.NAMESPACE_SVINAPPLIER);
            final String REGEXP_TO_GET_PARENT_PATH = "(.+)(/[^\\[\\]/]+(\\[.+\\])?)$";
            Pattern patternToGetParentPath = Pattern.compile(REGEXP_TO_GET_PARENT_PATH);
            Matcher matcherToGetParentPath = patternToGetParentPath.matcher(searchPathInternal);
            matcherToGetParentPath.matches();
            String pathToParent = matcherToGetParentPath.group(1);
            Attribute pathToParentAttr = new Attribute("underParentAtPath", pathToParent);
            Attribute documentToChangeAttr = new Attribute("atResource", docAndPath[0]);
            createNodeElement.setAttribute(documentToChangeAttr);
            createNodeElement.setAttribute(pathToParentAttr);
            createNodeElement.addContent(nodeToCreate);
            resultRoot.addContent(createNodeElement);

        } else {
            for (int i = 0; i < discordingElementAtDocList.size(); i++) {
                Element nodeToModify = currentDiscrepancyElement
                        .getChild("suggestedPartialNode", NAMESPACE_SVRL_INTERPETER_REPORT).getChildren().get(0)
                        .clone();
                //Sometimes, svinrep namespace is declared here (it is not clear why). We must remove it.
                nodeToModify.removeNamespaceDeclaration(NAMESPACE_SVRL_INTERPETER_REPORT);
                Element discordingElementAtDoc = discordingElementAtDocList.get(i);
                mixTreesRecursive(discordingElementAtDoc, nodeToModify, null,
                        NAMESPACE_AUTOCONF_METADATA.getURI());
                Element replaceNodeElement = new Element("replace-node",
                        AutoconfXMLConstants.NAMESPACE_SVINAPPLIER);
                Attribute pathToElementAttr = new Attribute("atPath",
                        generateAttributeBasedPath(discordingElementAtDoc));
                Attribute documentToChangeAttr = new Attribute("atResource", docAndPath[0]);
                replaceNodeElement.setAttribute(documentToChangeAttr);
                replaceNodeElement.setAttribute(pathToElementAttr);
                replaceNodeElement.addContent(nodeToModify);
                resultRoot.addContent(replaceNodeElement);
            }
        }
    }
    return result;
}

From source file:com.cats.version.VersionCfgParseAndSave.java

License:Apache License

public List<VersionInfo> getVersionInfo(String fullPath) {
    SAXBuilder builder = new SAXBuilder();
    List<VersionInfo> infos = new ArrayList<VersionInfo>();
    try {//from w  w  w  . j  a v  a 2s  .  com
        Document doc = builder.build(new File(fullPath));
        Element root = doc.getRootElement();
        List<Element> softEles = root.getChildren("software");
        for (Element softEle : softEles) {
            String appName = softEle.getAttribute("name").getValue();
            String versionCode = softEle.getChildText("latest-version-code");
            String versionName = softEle.getChildText("latest-version");
            String versionPath = softEle.getChildText("latest-version-abspath");
            String startupName = softEle.getChildText("latest-version-startup");

            Element detailEles = softEle.getChild("latest-version-detail");
            List<Element> detailItemEles = detailEles.getChildren("item");
            List<VersionInfoDetail> details = new ArrayList<VersionInfoDetail>();
            for (Element detailItem : detailItemEles) {
                String title = detailItem.getAttributeValue("name");
                List<Element> detailEleList = detailItem.getChildren("detail");
                List<String> detailList = new ArrayList<String>();
                for (Element detailEle : detailEleList) {
                    String strDetail = detailEle.getText();
                    detailList.add(strDetail);
                }
                details.add(new VersionInfoDetail(title, detailList));
            }

            Element ignoreEles = softEle.getChild("ignore-files");
            List<String> ignoreFiles = new ArrayList<String>();
            if (ignoreEles != null) {
                List<Element> ignoreItems = ignoreEles.getChildren("item");
                for (Element ignoreItem : ignoreItems) {
                    ignoreFiles.add(ignoreItem.getText());
                }
            }

            VersionInfo versionInfo = new VersionInfo();
            versionInfo.setAppName(appName);
            versionInfo.setVersion(versionName);
            versionInfo.setStartupName(startupName);
            versionInfo.setVersionCode(Integer.parseInt(versionCode));
            versionInfo.setPath(versionPath);
            versionInfo.setDetails(details);
            versionInfo.setIgnoreFiles(ignoreFiles);
            infos.add(versionInfo);
        }
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
    return infos;
}

From source file:com.cedarsoft.serialization.test.performance.XmlParserPerformance.java

License:Open Source License

public void benchJdom() {
    runBenchmark(new Runnable() {
        @Override//from  w  ww. ja v  a 2s  .  c  o m
        public void run() {
            try {
                for (int i = 0; i < SMALL; i++) {
                    Document doc = new SAXBuilder().build(new StringReader(CONTENT_SAMPLE));

                    Element fileTypeElement = doc.getRootElement();
                    Element extensionElement = fileTypeElement.getChild("extension");

                    Extension extension = new Extension(extensionElement.getAttributeValue("delimiter"),
                            extensionElement.getText(),
                            extensionElement.getAttribute("default").getBooleanValue());
                    FileType fileType = new FileType(fileTypeElement.getChildText("id"), extension,
                            fileTypeElement.getAttribute("dependent").getBooleanValue());

                    assertNotNull(fileType);
                }
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }, 4);
}

From source file:com.cisco.oss.foundation.logging.FoundationLogger.java

License:Apache License

/**
  * The sniffing Loggers are some special Loggers, whose level will be set to TRACE forcedly.
 * @param logger//from   w  ww .  j av a 2s  .  c om
  */
private static void updateSniffingLoggersLevel(Logger logger) {

    InputStream settingIS = FoundationLogger.class.getResourceAsStream("/sniffingLogger.xml");
    if (settingIS == null) {
        logger.debug("file sniffingLogger.xml not found in classpath");
    } else {
        try {
            SAXBuilder builder = new SAXBuilder();
            Document document = builder.build(settingIS);
            settingIS.close();
            Element rootElement = document.getRootElement();
            List<Element> sniffingloggers = rootElement.getChildren("sniffingLogger");
            for (Element sniffinglogger : sniffingloggers) {
                String loggerName = sniffinglogger.getAttributeValue("id");
                Logger.getLogger(loggerName).setLevel(Level.TRACE);
            }
        } catch (Exception e) {
            logger.error("cannot load the sniffing logger configuration file. error is: " + e, e);
            throw new IllegalArgumentException("Problem parsing sniffingLogger.xml", e);
        }
    }

}