List of usage examples for org.apache.commons.lang3 StringUtils removeEnd
public static String removeEnd(final String str, final String remove)
Removes a substring only if it is at the end of a source string, otherwise returns the source string.
A null source string will return null .
From source file:ubic.gemma.core.association.phenotype.PhenotypeAssociationManagerServiceImpl.java
@SuppressWarnings("ResultOfMethodCallIgnored") // Will know if mkdirs failed from failures of later methods @Override/*from www. j a v a 2 s.c o m*/ @Transactional(readOnly = true) public void writeAllEvidenceToFile() throws IOException { String disclaimer = "# Generated by Gemma\n" + "# " + DateUtil.getTodayDate(true) + "\n" + "# Fields are delimited by tabs\n" + "# If you use this file for your research, please cite PhenoCarta (previously known as Neurocarta): Portales-Casamar, E., et al., Neurocarta: aggregating and sharing disease-gene relations for the neurosciences. BMC Genomics. 2013 Feb 26;14(1):129.\n"; int i = 0; // path of the folder where the dump will be created and the data put String mainFolderPath = PhenotypeAssociationConstants.PHENOCARTA_HOME_FOLDER_PATH + PhenotypeAssociationConstants.PHENOCARTA_EXPORT + "_" + DateUtil.getTodayDate(true) + File.separator; // folder where AnnotationByDatasets will be kept String datasetsFolderPath = mainFolderPath + PhenotypeAssociationConstants.DATASET_FOLDER_NAME + File.separator; // folder where ErmineJ Files are kept String ermineJFolderPath = mainFolderPath + PhenotypeAssociationConstants.ERMINEJ_FOLDER_NAME + File.separator; // folder where ErmineJ Files, without OMIM-sourced data, are kept String ermineJWithOmimFolderPath = mainFolderPath + "AnnotationsWithOMIM" + File.separator; // creates the folders if they dont exist File phenocartaHomeFolder = new File(PhenotypeAssociationConstants.PHENOCARTA_HOME_FOLDER_PATH); phenocartaHomeFolder.mkdir(); File mainFolder = new File(mainFolderPath); mainFolder.mkdir(); File datasetsFolder = new File(datasetsFolderPath); datasetsFolder.mkdir(); File ermineJFolder = new File(ermineJFolderPath); ermineJFolder.mkdir(); File ermineJWithOmimFolder = new File(ermineJWithOmimFolderPath); ermineJWithOmimFolder.mkdir(); // this writer will be used to write 1 file per resource BufferedWriter fileWriterDataSource; // this writer is the dump of all evidence try (BufferedWriter fileWriterAllEvidence = new BufferedWriter( new FileWriter(mainFolderPath + PhenotypeAssociationConstants.FILE_ALL_PHENOCARTA_ANNOTATIONS)); BufferedWriter fileWriterAllEvidenceWithOMIM = new BufferedWriter( new FileWriter(mainFolderPath + "AnnotationsWithOMIM" + File.separator + PhenotypeAssociationConstants.FILE_ALL_PHENOCARTA_ANNOTATIONS))) { // header of file String header = disclaimer + "Data Source\tGene NCBI\tGene Symbol\tTaxon\tPhenotype Names\tRelationship\tPhenotype URIs\tPubmeds\tWeb Link\tIs Negative\tNote\n"; fileWriterAllEvidence.write(header); // lets get all external databases linked to evidence, we will create a file for each Collection<ExternalDatabaseValueObject> externalDatabaseValueObjects = this .findExternalDatabasesWithEvidence(); for (ExternalDatabaseValueObject externalDatabaseValueObject : externalDatabaseValueObjects) { File thisFile = new File( datasetsFolderPath + externalDatabaseValueObject.getName().replaceAll(" ", "") + ".tsv"); boolean currDBFoundinExtDBs = false; Iterator<ExternalDatabaseStatisticsValueObject> iter = this.loadNeurocartaStatistics().iterator(); ExternalDatabaseStatisticsValueObject dbFromColln = null; while (!currDBFoundinExtDBs && iter.hasNext()) { dbFromColln = iter.next(); if (dbFromColln.getName().equals(externalDatabaseValueObject.getName())) currDBFoundinExtDBs = true; } if (dbFromColln != null && dbFromColln.getLastUpdateDate() != null && dbFromColln.getLastUpdateDate().getTime() > thisFile.lastModified()) { fileWriterDataSource = new BufferedWriter(new FileWriter(datasetsFolderPath + externalDatabaseValueObject.getName().replaceAll(" ", "") + ".tsv")); // header of file fileWriterDataSource.write(header); // not using value object to make it faster Collection<PhenotypeAssociation> phenotypeAssociations; // this one is a special case, not actually linked to an external database if (externalDatabaseValueObject.getName() .equalsIgnoreCase(PhenotypeAssociationConstants.MANUAL_CURATION)) { phenotypeAssociations = this.phenoAssocService.findEvidencesWithoutExternalDatabaseName(); } else { phenotypeAssociations = this.phenoAssocService.findEvidencesWithExternalDatabaseName( externalDatabaseValueObject.getName(), null, 0); } for (PhenotypeAssociation phenotypeAssociation : phenotypeAssociations) { if (i++ % 5000 == 0) { PhenotypeAssociationManagerServiceImpl.log .debug("Phenocarta dump of evidence at evidence number: " + i); } StringBuilder pubmeds = new StringBuilder(); for (PhenotypeAssociationPublication phenotypeAssociationPublication : phenotypeAssociation .getPhenotypeAssociationPublications()) { String pubId = phenotypeAssociationPublication.getCitation().getPubAccession() .getAccession() + ";"; // primary should be order first if (phenotypeAssociationPublication.getType() .equals(PhenotypeAssPubValueObject.PRIMARY)) { pubmeds.insert(0, pubId); } else { pubmeds.append(pubId); } } String relationship; relationship = phenotypeAssociation.getRelationship(); StringBuilder phenotypes = new StringBuilder(); for (Characteristic cha : phenotypeAssociation.getPhenotypes()) { phenotypes.append(cha.getValue()).append(";"); } StringBuilder phenotypesUri = new StringBuilder(); for (Characteristic cha : phenotypeAssociation.getPhenotypes()) { if (StringUtils.isNotBlank(cha.getValueUri())) { phenotypesUri.append(cha.getValueUri()).append(";"); } } // this should never happen if ((phenotypes.length() == 0) || (phenotypesUri.length() == 0)) { PhenotypeAssociationManagerServiceImpl.log.error( "Found an evidence without phenotypes : " + phenotypeAssociation.getId()); } String webLink = ""; if (phenotypeAssociation.getEvidenceSource() != null && phenotypeAssociation.getEvidenceSource().getExternalDatabase() != null) { webLink = phenotypeAssociation.getEvidenceSource().getExternalDatabase().getWebUri() + phenotypeAssociation.getEvidenceSource().getAccession(); } String isNegative; if (phenotypeAssociation.getIsNegativeEvidence()) { isNegative = "Yes"; } else { isNegative = "No"; } String description = phenotypeAssociation.getDescription(); // represents 1 evidence String evidenceLine = externalDatabaseValueObject.getName() + "\t" + phenotypeAssociation.getGene().getNcbiGeneId() + "\t" + phenotypeAssociation.getGene().getOfficialSymbol() + "\t" + phenotypeAssociation.getGene().getTaxon().getCommonName() + "\t" + StringUtils.removeEnd(phenotypes.toString(), ";") + "\t" + relationship + "\t" // relationship // information + StringUtils.removeEnd(phenotypesUri.toString(), ";") + "\t" + StringUtils.removeEnd(pubmeds.toString(), ";") + "\t" + webLink + "\t" + isNegative + "\t" + description + "\n"; fileWriterDataSource.write(evidenceLine); if (!externalDatabaseValueObject.getName().contains("OMIM")) fileWriterAllEvidence.write(evidenceLine); fileWriterAllEvidenceWithOMIM.write(evidenceLine); } fileWriterDataSource.close();// finish writing one given data src file } // old: finish loop of writing all ext data src files } // new: finish loop of writing all ext data src files, including checking modified times fileWriterAllEvidence.close(); fileWriterAllEvidenceWithOMIM.close(); // LatestEvidenceExport ---> points to the latest dump File symbolicLink = new File(PhenotypeAssociationConstants.PHENOCARTA_HOME_FOLDER_PATH + PhenotypeAssociationConstants.LATEST_EVIDENCE_EXPORT); if (symbolicLink.exists()) { Files.delete(symbolicLink.toPath()); } Files.createSymbolicLink(symbolicLink.toPath(), mainFolder.toPath()); PhenotypeAssociationManagerServiceImpl.log .debug("After symlink code; symlink now exists: " + symbolicLink.exists()); PhenotypeAssociationManagerServiceImpl.log.debug("Right before ErmineJ; latest dir exists: " + mainFolder.exists() + " and is: " + mainFolder.toPath().toString()); this.writeErmineJFile(ermineJFolderPath, disclaimer, this.taxonService.findByCommonName("mouse"), false); this.writeErmineJFile(ermineJFolderPath, disclaimer, this.taxonService.findByCommonName("mouse"), true); this.writeErmineJFile(ermineJFolderPath, disclaimer, this.taxonService.findByCommonName("human"), false); this.writeErmineJFile(ermineJFolderPath, disclaimer, this.taxonService.findByCommonName("human"), true); this.writeErmineJFile(ermineJWithOmimFolderPath, disclaimer, this.taxonService.findByCommonName("mouse"), false); this.writeErmineJFile(ermineJWithOmimFolderPath, disclaimer, this.taxonService.findByCommonName("mouse"), true); this.writeErmineJFile(ermineJWithOmimFolderPath, disclaimer, this.taxonService.findByCommonName("human"), false); this.writeErmineJFile(ermineJWithOmimFolderPath, disclaimer, this.taxonService.findByCommonName("human"), true); } }
From source file:ubic.gemma.core.loader.association.phenotype.PhenotypeProcessingUtil.java
/** * step 1 using an OMIM or MESH to link to a DO id *//*w w w . j a v a2 s . c o m*/ private boolean findOmimMeshInDiseaseOntology(String meshOrOmimId, Gene gene, String pubmed, String evidenceCode, String description, String externalDatabase, String databaseLink, Collection<OntologyTerm> onParents) throws Exception { String mappingType; StringBuilder valuesUri = new StringBuilder(); StringBuilder originalPhenotype = new StringBuilder(meshOrOmimId); // this just provides a text version of the term, but only if it is a recognized mesh or omim id. String meshOrOmimIdValue = this.findDescriptionUsingTerm(meshOrOmimId); // use the ontology to find description if (meshOrOmimIdValue != null) { originalPhenotype.append(" (").append(meshOrOmimIdValue.toLowerCase()).append(")"); } // using parents if (onParents != null) { mappingType = PhenotypeMappingType.INFERRED_XREF.toString(); Map<String, Collection<OntologyTerm>> dieaseOn = this.meshToDiseaseTerms(onParents); originalPhenotype.append(" PARENT: ("); for (String key : dieaseOn.keySet()) { originalPhenotype.append(key).append(","); } originalPhenotype = new StringBuilder(StringUtils.removeEnd(originalPhenotype.toString(), ",") + ")"); for (Collection<OntologyTerm> colOn : dieaseOn.values()) { for (OntologyTerm o : colOn) { valuesUri.append(o.getUri()).append(";"); } } } else { mappingType = PhenotypeMappingType.XREF.toString(); Collection<OntologyTerm> ontologyTerms = this.findOntologyTermsUriWithDiseaseId(meshOrOmimId); for (OntologyTerm ontologyTerm : ontologyTerms) { valuesUri.append(ontologyTerm.getUri()).append(";"); } } if (valuesUri.length() > 0) { outFinalResults.write(gene.getOfficialSymbol() + "\t" + gene.getNcbiGeneId() + "\t" + pubmed + "\t" + evidenceCode + "\t" + description + "\t" + externalDatabase + "\t" + databaseLink + "\t" + mappingType + "\t" + originalPhenotype + "\t" + valuesUri + "\n"); return true; } return false; }
From source file:ubic.gemma.core.loader.association.phenotype.PhenotypeProcessingUtil.java
private boolean findUsingManualMappingFile(String meshOrOmimId, String annotatorKeyword, Gene gene, String pubmed, String evidenceCode, String description, String externalDatabase, String databaseLink, Collection<OntologyTerm> onParents) throws Exception { String mappingType;//from www. j ava 2 s .co m StringBuilder originalPhenotype; Collection<String> phenotypesUri = new HashSet<>(); if (onParents != null) { mappingType = PhenotypeMappingType.INFERRED_CURATED.toString(); originalPhenotype = new StringBuilder( meshOrOmimId + this.findExtraInfoMeshDescription(meshOrOmimId) + " PARENT: ("); for (OntologyTerm o : onParents) { String meshId = this.changeToId(o.getUri()); Collection<String> uri = this.findManualMappingTermValueUri(meshId); if (uri != null && !uri.isEmpty()) { phenotypesUri.addAll(uri); originalPhenotype.append(meshId).append(","); } } originalPhenotype = new StringBuilder(StringUtils.removeEnd(originalPhenotype.toString(), ",") + ")"); } else { mappingType = PhenotypeMappingType.CURATED.toString(); if (meshOrOmimId != null) { originalPhenotype = new StringBuilder(meshOrOmimId); } else { originalPhenotype = new StringBuilder(annotatorKeyword); } phenotypesUri = this.findManualMappingTermValueUri(originalPhenotype.toString()); originalPhenotype.append(this.findExtraInfoMeshDescription(originalPhenotype.toString())); } if (phenotypesUri != null && !phenotypesUri.isEmpty()) { outFinalResults.write(gene.getOfficialSymbol() + "\t" + gene.getNcbiGeneId() + "\t" + pubmed + "\t" + evidenceCode + "\t" + description + "\t" + externalDatabase + "\t" + databaseLink + "\t" + mappingType + "\t" + originalPhenotype + "\t" + StringUtils.join(phenotypesUri, ";") + "\n"); return true; } return false; }
From source file:ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator.java
/** * Determine the set of external accession values that will be generated during parsing. This can be used to * pre-empty time-consuming fetch and download of data we already have. * * @param geoAccession geo accession/*from w w w .j av a 2s . c o m*/ * @return database entries */ public Collection<DatabaseEntry> getProjectedAccessions(String geoAccession) { ExternalDatabase ed = ExternalDatabase.Factory.newInstance(); ed.setName("GEO"); Collection<DatabaseEntry> accessions = new HashSet<>(); // DatabaseEntry StringBuilder seriesAccession = new StringBuilder(); if (geoAccession.startsWith("GSE")) { seriesAccession = new StringBuilder(geoAccession); } else if (geoAccession.startsWith("GPL")) { GeoDomainObjectGenerator.log.warn( "Determining if the data already exist for a GPL (" + geoAccession + ") is not implemented."); return null; } else if (geoAccession.startsWith("GDS")) { Collection<String> seriesAccessions = DatasetCombiner.findGSEforGDS(geoAccession); if (seriesAccessions == null || seriesAccessions.size() == 0) { throw new InvalidAccessionException("There is no series (GSE) for the accession " + geoAccession); } for (String string : seriesAccessions) { seriesAccession.append(string).append(","); } seriesAccession = new StringBuilder(StringUtils.removeEnd(seriesAccession.toString(), ",")); } else { if (StringUtils.isBlank(geoAccession)) { throw new InvalidAccessionException("GEO accession must not be blank. Enter a GSE, GDS or GPL"); } throw new InvalidAccessionException("'" + geoAccession + "' is not understood by Gemma; must be a GSE, GDS or GPL. Did you choose the right source database?"); } DatabaseEntry de = DatabaseEntry.Factory.newInstance(ed); de.setAccession(seriesAccession.toString()); accessions.add(de); return accessions; }
From source file:ubic.gemma.model.expression.experiment.ExperimentalFactorValueObject.java
public ExperimentalFactorValueObject(ExperimentalFactor factor) { super(factor.getId()); this.setName(factor.getName()); this.setDescription(factor.getDescription()); if (factor.getCategory() != null) this.setCategory(factor.getCategory().getCategory()); this.setCategoryUri(this.getCategoryUri(factor.getCategory())); /*/*from w ww . j av a2 s.c o m*/ * Note: this code copied from the ExperimentalDesignController. */ Collection<FactorValueValueObject> vals = new HashSet<>(); if (factor.getType() != null) { this.type = factor.getType().equals(FactorType.CATEGORICAL) ? "categorical" : "continuous"; } else { // Backwards compatibility: for old entries created prior to introduction of 'type' field in // ExperimentalFactor entity. // We have to take a guess. if (factor.getFactorValues().isEmpty()) { this.type = "categorical"; } else { // Just use first factor value to make our guess. if (factor.getFactorValues().iterator().next().getMeasurement() != null) { this.type = "continuous"; } else { this.type = "categorical"; } } } if (factor.getFactorValues() == null || factor.getFactorValues().isEmpty()) { return; } Collection<FactorValue> fvs = factor.getFactorValues(); StringBuilder factorValuesAsString = new StringBuilder(StringUtils.EMPTY); for (FactorValue fv : fvs) { String fvName = fv.toString(); if (StringUtils.isNotBlank(fvName)) { factorValuesAsString.append(fvName).append(", "); } } /* clean up the start and end of the string */ factorValuesAsString = new StringBuilder( StringUtils.remove(factorValuesAsString.toString(), factor.getName() + ":")); factorValuesAsString = new StringBuilder(StringUtils.removeEnd(factorValuesAsString.toString(), ", ")); this.setFactorValues(factorValuesAsString.toString()); this.numValues = factor.getFactorValues().size(); Characteristic c = factor.getCategory(); /* * NOTE this replaces code that previously made no sense. PP */ for (FactorValue value : factor.getFactorValues()) { vals.add(new FactorValueValueObject(value, c)); } this.setValues(vals); }
From source file:ubic.gemma.persistence.service.association.phenotype.PhenotypeAssociationDaoImpl.java
@SuppressWarnings("ConstantConditions") // Better readability private String addExternalDatabaseQuery(Collection<Long> externalDatabaseIds) { String externalDatabaseSqlQuery = ""; StringBuilder listIds = new StringBuilder(); Boolean excludeManualCuration = false; Boolean excludeExternalDatabase = false; if (externalDatabaseIds != null && !externalDatabaseIds.isEmpty()) { for (Long id : externalDatabaseIds) { if (id.equals(1L)) { excludeManualCuration = true; } else { listIds.append(id).append(","); excludeExternalDatabase = true; }/*w ww . ja v a 2 s . c o m*/ } listIds = new StringBuilder(StringUtils.removeEnd(listIds.toString(), ",")); // SLIGHTLY UNSAFE USE PARAMETER if (excludeManualCuration && excludeExternalDatabase) { //language=MySQL externalDatabaseSqlQuery = "and" + " phen.EVIDENCE_SOURCE_FK in (SELECT id FROM DATABASE_ENTRY dbe where dbe.EXTERNAL_DATABASE_FK not in (" + listIds + ")) "; } else if (excludeExternalDatabase) { //language=MySQL externalDatabaseSqlQuery = "and" + " (phen.EVIDENCE_SOURCE_FK is null or phen.EVIDENCE_SOURCE_FK " + "not in (SELECT id FROM DATABASE_ENTRY dbe where dbe.EXTERNAL_DATABASE_FK in (" + listIds + "))) "; } else if (excludeManualCuration) { externalDatabaseSqlQuery = "and" + " phen.EVIDENCE_SOURCE_FK is not null"; } } return externalDatabaseSqlQuery; }