List of usage examples for com.mongodb.client MongoCollection find
FindIterable<TDocument> find(ClientSession clientSession);
From source file:module.ImportPlatform.java
License:Open Source License
public ImportPlatform() { // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collectionPlatforms = db.getCollection("platforms"); MongoCollection<Document> collectionSamples = db.getCollection("samples"); MongoCollection<Document> collectionSeries = db.getCollection("series"); // ===== Platforms ===== List<String> listGpl = collectionSamples.distinct("exp_group.id_platform", String.class) .into(new ArrayList<String>()); for (String idPlatform : listGpl) { Document doc = collectionPlatforms.find(Filters.in("_id", idPlatform)).first(); if (doc.getString("type") == null) { System.out.println(idPlatform + ": " + doc); }//from w w w . j a va 2 s. c o m } mongoClient.close(); }
From source file:module.ImportPlatformFromFile.java
License:Open Source License
public ImportPlatformFromFile() { // === Display === System.out.println("\n================ BEGIN Module " + this.getClass().getName() + "================"); // === INPUT === String idPlatform = "GPL97"; String inputfile = this.getInputDirectory() + this.getDirSeparator() + "GPL97-17394.txt"; String gpl = idPlatform.toLowerCase().trim(); // ===== Session PostgreSQL ===== SessionFactory sessionFactory = HibernateUtil .buildSessionFactory("config/epimed_semantic.hibernate.cfg.xml"); Session session = sessionFactory.openSession(); // ===== DAO ===== OmGeneDao geneDao = new OmGeneDao(session); // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); try {/* w w w. j av a 2s. c om*/ // === Begin transaction === session.beginTransaction(); // ===== Load file ===== System.out.println("ID Platform " + gpl); System.out.println("LOADING \t " + inputfile); System.out.println("Please wait... "); List<String> listRows = fileService.loadTextFile(inputfile); // List<String> listRows = webService.loadGeoData(idPlatform); System.out.println("File sucessfully LOADED"); // ===== Recognize header ===== List<String> header = fileService.readHeader(listRows, "\t"); if (header == null || header.isEmpty()) { throw new ImportDataException("The header is empty"); } else { System.out.println("Header " + header); } Integer indId = fileService.findIndex(header, "ID"); Integer indGbacc = fileService.findIndex(header, "GB_ACC"); Integer indEntrez = fileService.findIndex(header, "ENTREZ"); if (indId == null || indGbacc == null || indEntrez == null) { throw new ImportDataException("Header not recognized: " + "ID index=" + indId + ", GB_ACC index=" + indGbacc + ", ENTREZ index=" + indEntrez); } else { System.out.println("The following header items are recognized:"); System.out.println("\t ID index=" + indId + ": " + header.get(indId)); System.out.println("\t GB_ACC index=" + indGbacc + ": " + header.get(indGbacc)); System.out.println("\t ENTREZ index=" + indEntrez + ": " + header.get(indEntrez)); } // ===== Recognize data ===== List<List<String>> data = fileService.readData(listRows, "\t"); if (data == null || data.isEmpty()) { throw new ImportDataException("The data are empty"); } else { System.out.println( "The data are sucessfully loaded: rows " + data.size() + ", columns " + data.get(0).size()); } // ===== Create specific tables ===== String sqlCheckTableProbe = "select * from information_schema.tables WHERE table_schema = 'hs' and table_name='om_probe_" + gpl + "'"; List<Object> result = session.createNativeQuery(sqlCheckTableProbe).getResultList(); String tableProbe = "hs.om_probe_" + gpl; String tableGP = "hs.om_gp_" + gpl; if (result == null || result.isEmpty()) { // Table probe String sqlCreateTableProbe = "create table " + tableProbe + "(id_probe VARCHAR(50) not null," + " genbank_acc VARCHAR(50) null," + " constraint pk_om_probe_" + gpl + " primary key (id_probe))"; session.createNativeQuery(sqlCreateTableProbe).executeUpdate(); // Table gp String sqlCreateTableGP = "create table " + tableGP + "(id_probe VARCHAR(50) not null," + " id_gene INT4 not null," + " constraint pk_om_gp_" + gpl + " primary key (id_probe, id_gene))"; session.createNativeQuery(sqlCreateTableGP).executeUpdate(); // Foregn keys String sqlAlterTableProbe = "alter table " + tableGP + " add constraint fk_gp_probe_" + gpl + " foreign key (id_probe)" + " references " + tableProbe + " (id_probe) on delete restrict on update restrict"; session.createNativeQuery(sqlAlterTableProbe).executeUpdate(); String sqlAlterTableGene = "alter table " + tableGP + " add constraint fk_gp_gene_" + gpl + " foreign key (id_gene)" + " references hs.om_gene (id_gene) on delete restrict on update restrict"; session.createNativeQuery(sqlAlterTableGene).executeUpdate(); } // ===== Import data ===== for (int i = 0; i < data.size(); i++) { // for (int i=0; i<10; i++) { List<String> dataline = data.get(i); String idProbe = dataline.get(indId).trim(); String genbankAcc = dataline.get(indGbacc).trim(); String sqlInsertProbe = "insert into " + tableProbe + " values('" + idProbe + "', null)"; if (genbankAcc != null && !genbankAcc.isEmpty()) { sqlInsertProbe = "insert into " + tableProbe + " values('" + idProbe + "', '" + genbankAcc + "')"; } session.createNativeQuery(sqlInsertProbe).executeUpdate(); OmGenbankUnigene gu = session.get(OmGenbankUnigene.class, genbankAcc); if (gu == null && genbankAcc != null && !genbankAcc.isEmpty()) { gu = new OmGenbankUnigene(); gu.setGenbankAcc(genbankAcc); session.save(gu); } String listEntrez = null; String[] parts = null; if (indEntrez < dataline.size()) { listEntrez = dataline.get(indEntrez).trim(); parts = listEntrez.split("[///\\p{Space}]"); for (String entrezString : parts) { Integer entrez = null; try { entrez = Integer.parseInt(entrezString); } catch (NumberFormatException e) { // nothing to do } if (entrez != null) { OmGene gene = geneDao.find(entrez); if (gene == null) { gene = geneDao.createGene(entrez, null); } String sqlInsertGP = "insert into " + tableGP + " values('" + idProbe + "', " + entrez + ")"; session.createNativeQuery(sqlInsertGP).executeUpdate(); } } } if (i % 1000 == 0) { System.out.println(i + "\t" + idProbe + "\t" + genbankAcc + "\t" + listEntrez + "\t" + Arrays.toString(parts)); } if (i % 20 == 0) { session.flush(); } } // ===== Subscribe platform ===== OmPlatform platform = session.get(OmPlatform.class, idPlatform); if (platform != null) { platform.setEnabled(true); session.update(platform); } else { MongoCollection<Document> collection = db.getCollection("platforms"); Document docPlatform = collection.find(Filters.eq("_id", idPlatform)).first(); String title = docPlatform.getString("title"); String manufacturer = docPlatform.getString("manufacturer"); platform = new OmPlatform(); platform.setIdPlatform(idPlatform); platform.setTitle(title); platform.setManufacturer(manufacturer); platform.setEnabled(true); session.save(platform); } // ===== Rights ===== String sqlRights; String[] users = { "epimed_prod", "epimed_web", "epimed_script" }; for (String user : users) { sqlRights = "GRANT SELECT ON ALL TABLES IN SCHEMA hs TO " + user; session.createNativeQuery(sqlRights).executeUpdate(); } sqlRights = "GRANT ALL ON ALL TABLES IN SCHEMA hs TO epimed_admin"; session.createNativeQuery(sqlRights).executeUpdate(); // === Commit transaction === session.getTransaction().commit(); // session.getTransaction().rollback(); } catch (Exception e) { session.getTransaction().rollback(); System.out.println("ROLLBACK in module " + this.getClass().getName()); e.printStackTrace(); } finally { if (session.isOpen()) { session.close(); } sessionFactory.close(); mongoClient.close(); } // === Display === System.out.println("================ END Module " + this.getClass().getName() + "================"); }
From source file:module.script.AddSeriesToSamples.java
License:Open Source License
public AddSeriesToSamples() { // ===== Service ===== FormatService formatService = new FormatService(); // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); Set<String> setProjects = new HashSet<String>(); MongoCollection<Document> collection = db.getCollection("sample"); Bson filters = Filters.and(Filters.in("series", "PRJNA270632")); List<Document> listDocuments = collection.find(filters).into(new ArrayList<Document>()); for (int i = 0; i < listDocuments.size(); i++) { Document doc = listDocuments.get(i); Document expgroup = doc.get("exp_group", Document.class); if (expgroup.get("exp_Mcount") != null) { List<String> projects = doc.get("series", ArrayList.class); setProjects.clear();/*ww w.j a v a 2 s . co m*/ setProjects.addAll(projects); setProjects.add("TISSUE_SPECIFIC_GENES_HS"); doc.put("series", setProjects); System.out.println(doc.getString("_id") + " " + projects + " -> " + setProjects); collection.updateOne(Filters.eq("_id", doc.getString("_id")), new Document("$set", doc)); } } mongoClient.close(); }
From source file:module.script.CorrectImportedData.java
License:Open Source License
public CorrectImportedData() { // ===== Service ===== FormatService formatService = new FormatService(); // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collection = db.getCollection("sample"); Bson filters = Filters.and(Filters.eq("main_gse_number", gseNumber)); List<Document> listDocuments = collection.find(filters).into(new ArrayList<Document>()); for (int i = 0; i < listDocuments.size(); i++) { Document doc = listDocuments.get(i); Document expgroup = (Document) doc.get("exp_group"); Document parameters = (Document) doc.get("parameters"); expgroup.append("id_tissue_stage", 2); expgroup.append("tissue_stage", "fetal"); // Update Mongo document doc.put("exp_group", expgroup); // doc.put("parameters", parameters); doc.put("analyzed", true); System.out.println(expgroup); collection.updateOne(Filters.eq("_id", doc.getString("_id")), new Document("$set", doc)); }/*from w ww .j a v a 2 s.c om*/ mongoClient.close(); }
From source file:module.script.emtab365.ImportSamplesEMTAB365.java
License:Open Source License
public ImportSamplesEMTAB365() { // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); // ===== Collections ====== MongoCollection<Document> collectionPlatforms = db.getCollection("platforms"); MongoCollection<Document> collectionSeries = db.getCollection("series"); MongoCollection<Document> collectionSamples = db.getCollection("samples"); // ===== Excel data loader ===== String inputfile = this.getInputDirectory() + this.getDirSeparator() + "E-MTAB-365.sdrf.xlsx"; System.out.println("LOADING \t " + inputfile); excelService.load(inputfile);//from ww w . j av a 2 s .co m // ===== Init values ====== String idSeries = "E-MTAB-365"; List<String> listSeries = new ArrayList<String>(); listSeries.add(idSeries); Document docSeries = collectionSeries.find(Filters.eq("_id", idSeries)).first(); String organism = "Homo sapiens"; // ==== Header processing ==== Map<Integer, String> mapHeader = new HashMap<Integer, String>(); for (int i = 0; i < excelService.getHeader().size(); i++) { String headerItem = (String) excelService.getHeader().get(i); if (headerItem != null && headerItem.contains("[")) { String[] parts = headerItem.split("[\\[\\]]"); headerItem = parts[1]; headerItem = headerItem.replaceAll("[:_\\.]", " "); } mapHeader.put(i, headerItem.trim()); } System.out.println(mapHeader); for (int i = 0; i < excelService.getData().size(); i++) { // for (int i=0; i<1; i++) { List<Object> dataline = excelService.getData().get(i); String idSample = (String) dataline.get(0); if (!idSample.equals("pool XX")) { String idPlatform = ((String) dataline.get(54)).trim(); if (idPlatform.contains("A-AFFY-44")) { idPlatform = "GPL570"; } else { Document docPlatform = mongoService.createPlatform(idPlatform, null, "9606", "Homo sapiens", null, null, null, null); UpdateResult res = collectionPlatforms.updateOne( Filters.eq("_id", docPlatform.getString("_id")), new Document("$set", docPlatform)); if (res.getMatchedCount() == 0) { collectionPlatforms.insertOne(docPlatform); } } Document docSample = mongoService.createSample(idSample, idSeries, listSeries, organism, (Date) docSeries.get("submission_date"), (Date) docSeries.get("last_update"), false); // === exp_group === Document expgroup = mongoService.createExpGroup(docSample, idPlatform, null, null, organism); docSample.append("exp_group", expgroup); // === parameters === Map<String, Object> mapParameters = new HashMap<String, Object>(); for (int j = 0; j < dataline.size(); j++) { String key = mapHeader.get(j); Object value = dataline.get(j); if (value instanceof String) { String valueString = ((String) value).trim(); if (valueString != null && !valueString.isEmpty() && !valueString.equals("NA") && !valueString.equals("ND")) { value = valueString; } else { value = null; } } if (key != null && value != null) { mapParameters.put(key, value); // System.out.println(key + "='" + value+"'"); } } Document parameters = mongoService.createParameters(docSample, mapParameters); docSample.append("parameters", parameters); // === Delete if already exist === collectionSamples.deleteOne(Filters.eq("_id", docSample.getString("_id"))); // ===== Insert data ===== collectionSamples.insertOne(docSample); System.out.println(docSample); } } mongoClient.close(); }
From source file:module.script.emtab365.UpdateSamplesEMTAB365.java
License:Open Source License
public UpdateSamplesEMTAB365() { // ===== Session PostgreSQL ===== SessionFactory sessionFactory = HibernateUtil .buildSessionFactory("config/epimed_semantic.hibernate.cfg.xml"); Session session = sessionFactory.openSession(); // ===== INIT ===== ClMorphology ductal = session.get(ClMorphology.class, "8500/3"); // 8500/3 Infiltrating duct carcinoma, NOS (C50._) ClMorphology lobular = session.get(ClMorphology.class, "8520/3"); // 8520/3 Lobular carcinoma, NOS (C50._) ClMorphology morphology = session.get(ClMorphology.class, "8010/3"); // Carcinoma ClTopology breast = session.get(ClTopology.class, "C50.9"); // Breast ClTopology blood = session.get(ClTopology.class, "C42.0"); // Blood ClTopology lymphNode = session.get(ClTopology.class, "C77.9"); // Lymph node // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collection = db.getCollection("samples"); List<Document> listDocuments = collection.find(Filters.in("series", gseNumber)) .into(new ArrayList<Document>()); for (int i = 0; i < listDocuments.size(); i++) { Document doc = listDocuments.get(i); Document expgroup = (Document) doc.get("exp_group"); Document parameters = (Document) doc.get("parameters"); String histoType = parameters.getString("Histology"); String histoSubtype = parameters.getString("CIT classification"); expgroup.put("histology_subtype", histoSubtype); if (histoType != null && histoType.toLowerCase().equals("lobular")) { morphology = lobular;//w w w . ja v a 2 s. c om } if (histoType != null && histoType.toLowerCase().equals("ductal")) { morphology = ductal; } expgroup.put("id_morphology", morphology.getIdMorphology()); expgroup.put("morphology", morphology.getName()); expgroup.put("sample_source", parameters.getString("Source Name")); String organismPart = parameters.getString("OrgansimPart"); ClTopology topology = null; if (organismPart != null) { if (organismPart.toLowerCase().contains("breast")) { topology = breast; } if (organismPart.toLowerCase().contains("blood")) { topology = blood; } if (organismPart.toLowerCase().contains("lymph")) { topology = lymphNode; } } else { topology = breast; } expgroup.put("id_topology", topology.getIdTopology()); expgroup.put("topology", topology.getName()); expgroup.put("id_topology_group", topology.getClTopologyGroup().getIdGroup()); expgroup.put("topology_group", topology.getClTopologyGroup().getName()); // ==== Survival ===== Object dfs_months = parameters.get("Delay Metastasis Free Survival months"); if (dfs_months != null) { expgroup.put("dfs_months", dfs_months); } Object os_months = parameters.get("Delay Overall Survival months"); if (os_months != null) { expgroup.put("os_months", os_months); } Double os = (Double) expgroup.get("os_months"); Double dfs = (Double) expgroup.get("dfs_months"); if (os != null && dfs != null && dfs.equals(os)) { expgroup.put("relapsed", false); } if (os != null && dfs != null && dfs < os) { expgroup.put("relapsed", true); } if (os != null && dfs != null && dfs > os) { expgroup.put("relapsed", null); } Object relapseDate = parameters.get("Relapse Metastasis Date"); if (relapseDate != null) { expgroup.put("relapsed", true); } // ==== Grade ==== expgroup.put("tnm_grade", parameters.get("Grade Scarff Bloom Richardson")); // ==== Files ===== expgroup.put("ftp", parameters.getString("ArrayExpress FTP file")); expgroup.put("file_name", parameters.getString("Array Data File")); expgroup.remove("individual"); if (parameters.getString("Individual") != null) { expgroup.put("individual", parameters.getString("Individual")); } // ==== Biomarkers ==== /* String p53 = parameters.getString("Phenotype - TP53 Gene mutation Status"); expgroup.put("p53", value) String pr = parameters.getString("PGR Protein expression"); String er = parameters.getString("ESR1 Protein expression"); String her2 = parameters.getString("ERBB2 Protein expression"); */ doc.put("exp_group", expgroup); System.out.println(i + " " + doc.get("_id") + " " + doc.get("analyzed") + " " + expgroup); if (commit) { UpdateResult updateResult = collection.updateOne(Filters.eq("_id", doc.get("_id")), new Document("$set", doc)); } } if (session.isOpen()) { session.close(); } sessionFactory.close(); mongoClient.close(); }
From source file:module.script.epilung.SearchSamples.java
License:Open Source License
public SearchSamples() { // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collectionSamples = db.getCollection("samples"); MongoCollection<Document> collectionPlatforms = db.getCollection("platforms"); Bson filters = Filters.and(//from w ww. j ava2s. c om Filters.in("exp_group.id_platform", new String[] { "GPL13534", "GPL8490", "GPL21145" }), Filters.eq("exp_group.id_tissue_status", 1), Filters.ne("exp_group.id_topology", null)); /* List<Document> list = collectionSamples .find(filters) .into(new ArrayList<Document>()); */ List<Document> list = collectionSamples.aggregate(Arrays.asList(Aggregates.match(filters), Aggregates.group("$exp_group.topology", Accumulators.sum("total", 1)), Aggregates.sort(Sorts.orderBy(Sorts.descending("total"))))).into(new ArrayList<Document>()); for (int i = 0; i < list.size(); i++) { System.out.println((i + 1) + " " + list.get(i)); } collectionPlatforms.find(Filters.regex("title", ".*ethyl.*")).forEach(printBlock); mongoClient.close(); }
From source file:module.script.epilung.SearchSamplesLungAdenocarcinoma.java
License:Open Source License
public SearchSamplesLungAdenocarcinoma() { // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collectionSamples = db.getCollection("samples"); MongoCollection<Document> collectionPlatforms = db.getCollection("platforms"); System.out.println("\n================== SUMMARY =================="); Bson[] pFilters = { Filters.eq("_id", "GPL570"), Filters.eq("type", "rna-seq") }; for (Bson pFilter : pFilters) { // === Platforms === List<String> platforms = new ArrayList<String>(); List<String> platformstext = new ArrayList<String>(); List<Document> list = collectionPlatforms.find(pFilter).into(new ArrayList<Document>()); for (Document doc : list) { platforms.add(doc.getString("_id")); platformstext.add(doc.getString("_id") + " " + doc.getString("type")); }/*w w w .j a v a 2 s .co m*/ String lungAdenoFilterName = "Lung adenocarcinoma samples with survival"; Bson lungAdenoFilter = Filters.and(Filters.in("exp_group.id_platform", platforms), Filters.eq("exp_group.id_tissue_status", 3), Filters.eq("exp_group.id_topology_group", "C34"), Filters.regex("exp_group.morphology", ".*denocarcinoma.*"), Filters.or(Filters.ne("exp_group.os_months", null), Filters.ne("exp_group.dfs_months", null))); String ntlFilterName = "Normal lung samples"; Bson ntlFilter = Filters.and(Filters.in("exp_group.id_platform", platforms), Filters.eq("exp_group.id_tissue_status", 1), Filters.eq("exp_group.id_topology_group", "C34")); String[] filterNames = { lungAdenoFilterName, ntlFilterName }; Bson[] sFilters = { lungAdenoFilter, ntlFilter }; for (int i = 0; i < sFilters.length; i++) { Bson filter = sFilters[i]; String filterName = filterNames[i]; List<Document> docs = collectionSamples.find(filter).into(new ArrayList<Document>()); Set<String> setGse = new HashSet<String>(); for (Document doc : docs) { setGse.add(doc.getString("main_gse_number")); // System.out.println(doc); } System.out.println("-------------------------------------------"); System.out.println("Query: " + filterName); System.out.println("Platforms: " + platformstext); System.out.println("Samples: " + docs.size()); System.out.println("Series: " + setGse); } } /* List<Document> list = collectionSamples .aggregate( Arrays.asList( Aggregates.match(filters), Aggregates.group("$exp_group.topology", Accumulators.sum("total", 1)), Aggregates.sort(Sorts.orderBy(Sorts.descending("total"))) )) .into(new ArrayList<Document>()); */ // collectionPlatforms.find(Filters.regex("title", ".*ethyl.*")).forEach(printBlock); mongoClient.close(); }
From source file:module.script.epimed_ontology.UpdateFetalAdultOvary.java
License:Open Source License
public UpdateFetalAdultOvary() { // ===== Session PostgreSQL ===== SessionFactory sessionFactory = HibernateUtil .buildSessionFactory("config/epimed_semantic.hibernate.cfg.xml"); Session session = sessionFactory.openSession(); ClTopologyDao topologyDao = new ClTopologyDao(session); ClTopology adultOvary = topologyDao.find("C56.9"); ClTopology fetalOvary = topologyDao.find("E56.9"); // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collectionSample = db.getCollection("sample"); Bson filters = Filters.and(Filters.eq("exp_group.id_topology", "C56.9"), // ovary Filters.eq("exp_group.id_tissue_stage", 1) // adult );/*from w w w .j av a 2 s. c o m*/ List<Document> samples = collectionSample.find(filters).into(new ArrayList<Document>()); for (Document sample : samples) { Document expgroup = sample.get("exp_group", Document.class); expgroup.append("id_topology", adultOvary.getIdTopology()); expgroup.append("topology", adultOvary.getName()); sample.append("exp_group", expgroup); collectionSample.updateOne(Filters.eq("_id", sample.getString("_id")), new Document("$set", sample)); } System.out.println(samples.size()); // === Commit transaction === // session.getTransaction().commit(); session.getTransaction().rollback(); if (session.isOpen()) { session.close(); } sessionFactory.close(); mongoClient.close(); }
From source file:module.script.ImportArrayExpress1733.java
License:Open Source License
public ImportArrayExpress1733() { // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collectionSeries = db.getCollection("series"); MongoCollection<Document> collectionSamples = db.getCollection("samples"); // ===== Pattern ===== String patternText = "\\[[\\p{Print}\\p{Space}]+\\]"; ;//from w ww. j a va 2s . c o m Pattern pattern = Pattern.compile(patternText); // ===== Series ===== for (String accession : listAccessions) { List<String> accessionAsList = new ArrayList<String>(); accessionAsList.add(accession); String urlString = "https://www.ebi.ac.uk/arrayexpress/files/" + accession + "/" + accession + ".idf.txt"; System.out.println(urlString); String text = webService.loadUrl(urlString); String[] parts = text.split(lineSeparator); List<String> dataSeries = new ArrayList<String>(Arrays.asList(parts)); AESeries series = new AESeries(dataSeries); System.out.println(series); // ===== Check if already imported as a GSE ===== boolean isGseFound = false; String gseNumber = null; for (String secondaryAccession : series.getListAccessions()) { if (secondaryAccession.startsWith("GSE")) { gseNumber = secondaryAccession; Document gse = db.getCollection("series").find(Filters.eq("_id", secondaryAccession)).first(); isGseFound = gse != null; } } int nbImportedSamples = 0; if (!isGseFound) { // ===== Create Mongo series ===== Document docSeries = mongoService.createSeries(accession, series.getTitle(), null, series.getSubmissionDate(), series.getSubmissionDate()); if (series.getListAccessions() != null && !series.getListAccessions().isEmpty()) { docSeries.put("secondary_accessions", series.getListAccessions()); } if (false) { UpdateResult updateResult = collectionSeries.updateOne(Filters.eq("_id", accession), new Document("$set", docSeries)); if (updateResult.getMatchedCount() == 0) { collectionSeries.insertOne(docSeries); } } System.out.println(docSeries); // ===== Import clinical data ===== String url = "https://www.ebi.ac.uk/arrayexpress/files/" + accession + "/" + series.getSdrf(); System.out.println(url); String clindata = webService.loadUrl(url); String[] clinparts = clindata.split(lineSeparator); List<String> data = new ArrayList<String>(Arrays.asList(clinparts)); // ===== Recognize samples ===== List<String> header = this.createHeader(data.get(0), pattern); System.out.println(header); for (int i = 1; i < data.size(); i++) { Integer nbSamples = data.size() - 1; Map<String, Object> mapParameters = this.createMapParameters(data.get(i), header); String idSample = this.createIdSample(mapParameters); if (idSample == null) { System.err.println("ERROR: idSample is not recongnized for " + accession); System.out.println("Line " + i); System.out.println(mapParameters); mongoClient.close(); System.exit(0); } else { if (formatIdSample) { idSample = "E-MTAB-2836" + "-" + idSample; idSample = idSample.trim().replaceAll(" ", "-"); } } idSample = idSample.split(" ")[0].trim(); // === Organism === String organism = (String) mapParameters.get("organism"); if (organism == null || organism.isEmpty()) { organism = defaultOrganism; } // === Platform === String platform = (String) mapParameters.get("LIBRARY_STRATEGY"); if (platform != null && !platform.isEmpty()) { platform = platform.toLowerCase().trim(); } else { platform = defaultPlatform; } Document docSampleExist = collectionSamples.find(Filters.eq("_id", idSample)).first(); boolean docAlreadyExist = docSampleExist != null; System.out.println("docAlreadyExist " + docAlreadyExist); // === Delete old if already exist === if (docAlreadyExist) { List<String> listSeries = (List<String>) docSampleExist.get("series"); Set<String> setSeries = new HashSet<String>(); listSeries.add(accession); setSeries.addAll(listSeries); listSeries.clear(); listSeries.addAll(setSeries); docSampleExist.append("series", listSeries); System.out.println(docSampleExist); if (commit) { collectionSamples.deleteOne(eq("_id", docSampleExist.get("_id"))); collectionSamples.insertOne(docSampleExist); } } } } else { System.out.println("GEO accession " + gseNumber + " corresponding to " + accession + " exists already. Skip import."); } System.out.println("Number of imported samples: " + nbImportedSamples); } mongoClient.close(); }