List of usage examples for com.mongodb.client MongoDatabase getCollection
MongoCollection<Document> getCollection(String collectionName);
From source file:module.script.CorrectImportedData.java
License:Open Source License
public CorrectImportedData() { // ===== Service ===== FormatService formatService = new FormatService(); // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collection = db.getCollection("sample"); Bson filters = Filters.and(Filters.eq("main_gse_number", gseNumber)); List<Document> listDocuments = collection.find(filters).into(new ArrayList<Document>()); for (int i = 0; i < listDocuments.size(); i++) { Document doc = listDocuments.get(i); Document expgroup = (Document) doc.get("exp_group"); Document parameters = (Document) doc.get("parameters"); expgroup.append("id_tissue_stage", 2); expgroup.append("tissue_stage", "fetal"); // Update Mongo document doc.put("exp_group", expgroup); // doc.put("parameters", parameters); doc.put("analyzed", true); System.out.println(expgroup); collection.updateOne(Filters.eq("_id", doc.getString("_id")), new Document("$set", doc)); }//from w w w.j av a 2 s. co m mongoClient.close(); }
From source file:module.script.emtab365.CreateSeriesEMTAB365.java
License:Open Source License
@SuppressWarnings({ "unchecked" }) public CreateSeriesEMTAB365() { // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); // MongoDatabase db = mongoClient.getDatabase("geo"); MongoCollection<Document> collectionSeries = db.getCollection("series"); Date submissionDate = null;/*w ww.j a v a 2s. c o m*/ Date lastUpdate = null; try { submissionDate = dateFormat.parse("2011-09-15"); lastUpdate = dateFormat.parse("2014-05-03"); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } List<String> platforms = new ArrayList<String>(); platforms.add("GPL570"); Document docSeries = new Document(); docSeries.append("_id", "E-MTAB-365") .append("title", "Transcription profiling by array of breast cancer samples to define breast cancer subsets") .append("platforms", platforms).append("submission_date", submissionDate) .append("last_update", lastUpdate).append("import_date", new Date()); System.out.println(docSeries); collectionSeries.insertOne(docSeries); mongoClient.close(); }
From source file:module.script.emtab365.ImportSamplesEMTAB365.java
License:Open Source License
public ImportSamplesEMTAB365() { // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); // ===== Collections ====== MongoCollection<Document> collectionPlatforms = db.getCollection("platforms"); MongoCollection<Document> collectionSeries = db.getCollection("series"); MongoCollection<Document> collectionSamples = db.getCollection("samples"); // ===== Excel data loader ===== String inputfile = this.getInputDirectory() + this.getDirSeparator() + "E-MTAB-365.sdrf.xlsx"; System.out.println("LOADING \t " + inputfile); excelService.load(inputfile);/* ww w . j ava 2s .c o m*/ // ===== Init values ====== String idSeries = "E-MTAB-365"; List<String> listSeries = new ArrayList<String>(); listSeries.add(idSeries); Document docSeries = collectionSeries.find(Filters.eq("_id", idSeries)).first(); String organism = "Homo sapiens"; // ==== Header processing ==== Map<Integer, String> mapHeader = new HashMap<Integer, String>(); for (int i = 0; i < excelService.getHeader().size(); i++) { String headerItem = (String) excelService.getHeader().get(i); if (headerItem != null && headerItem.contains("[")) { String[] parts = headerItem.split("[\\[\\]]"); headerItem = parts[1]; headerItem = headerItem.replaceAll("[:_\\.]", " "); } mapHeader.put(i, headerItem.trim()); } System.out.println(mapHeader); for (int i = 0; i < excelService.getData().size(); i++) { // for (int i=0; i<1; i++) { List<Object> dataline = excelService.getData().get(i); String idSample = (String) dataline.get(0); if (!idSample.equals("pool XX")) { String idPlatform = ((String) dataline.get(54)).trim(); if (idPlatform.contains("A-AFFY-44")) { idPlatform = "GPL570"; } else { Document docPlatform = mongoService.createPlatform(idPlatform, null, "9606", "Homo sapiens", null, null, null, null); UpdateResult res = collectionPlatforms.updateOne( Filters.eq("_id", docPlatform.getString("_id")), new Document("$set", docPlatform)); if (res.getMatchedCount() == 0) { collectionPlatforms.insertOne(docPlatform); } } Document docSample = mongoService.createSample(idSample, idSeries, listSeries, organism, (Date) docSeries.get("submission_date"), (Date) docSeries.get("last_update"), false); // === exp_group === Document expgroup = mongoService.createExpGroup(docSample, idPlatform, null, null, organism); docSample.append("exp_group", expgroup); // === parameters === Map<String, Object> mapParameters = new HashMap<String, Object>(); for (int j = 0; j < dataline.size(); j++) { String key = mapHeader.get(j); Object value = dataline.get(j); if (value instanceof String) { String valueString = ((String) value).trim(); if (valueString != null && !valueString.isEmpty() && !valueString.equals("NA") && !valueString.equals("ND")) { value = valueString; } else { value = null; } } if (key != null && value != null) { mapParameters.put(key, value); // System.out.println(key + "='" + value+"'"); } } Document parameters = mongoService.createParameters(docSample, mapParameters); docSample.append("parameters", parameters); // === Delete if already exist === collectionSamples.deleteOne(Filters.eq("_id", docSample.getString("_id"))); // ===== Insert data ===== collectionSamples.insertOne(docSample); System.out.println(docSample); } } mongoClient.close(); }
From source file:module.script.emtab365.UpdateSamplesEMTAB365.java
License:Open Source License
public UpdateSamplesEMTAB365() { // ===== Session PostgreSQL ===== SessionFactory sessionFactory = HibernateUtil .buildSessionFactory("config/epimed_semantic.hibernate.cfg.xml"); Session session = sessionFactory.openSession(); // ===== INIT ===== ClMorphology ductal = session.get(ClMorphology.class, "8500/3"); // 8500/3 Infiltrating duct carcinoma, NOS (C50._) ClMorphology lobular = session.get(ClMorphology.class, "8520/3"); // 8520/3 Lobular carcinoma, NOS (C50._) ClMorphology morphology = session.get(ClMorphology.class, "8010/3"); // Carcinoma ClTopology breast = session.get(ClTopology.class, "C50.9"); // Breast ClTopology blood = session.get(ClTopology.class, "C42.0"); // Blood ClTopology lymphNode = session.get(ClTopology.class, "C77.9"); // Lymph node // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collection = db.getCollection("samples"); List<Document> listDocuments = collection.find(Filters.in("series", gseNumber)) .into(new ArrayList<Document>()); for (int i = 0; i < listDocuments.size(); i++) { Document doc = listDocuments.get(i); Document expgroup = (Document) doc.get("exp_group"); Document parameters = (Document) doc.get("parameters"); String histoType = parameters.getString("Histology"); String histoSubtype = parameters.getString("CIT classification"); expgroup.put("histology_subtype", histoSubtype); if (histoType != null && histoType.toLowerCase().equals("lobular")) { morphology = lobular;/*from www . j a v a 2s . co m*/ } if (histoType != null && histoType.toLowerCase().equals("ductal")) { morphology = ductal; } expgroup.put("id_morphology", morphology.getIdMorphology()); expgroup.put("morphology", morphology.getName()); expgroup.put("sample_source", parameters.getString("Source Name")); String organismPart = parameters.getString("OrgansimPart"); ClTopology topology = null; if (organismPart != null) { if (organismPart.toLowerCase().contains("breast")) { topology = breast; } if (organismPart.toLowerCase().contains("blood")) { topology = blood; } if (organismPart.toLowerCase().contains("lymph")) { topology = lymphNode; } } else { topology = breast; } expgroup.put("id_topology", topology.getIdTopology()); expgroup.put("topology", topology.getName()); expgroup.put("id_topology_group", topology.getClTopologyGroup().getIdGroup()); expgroup.put("topology_group", topology.getClTopologyGroup().getName()); // ==== Survival ===== Object dfs_months = parameters.get("Delay Metastasis Free Survival months"); if (dfs_months != null) { expgroup.put("dfs_months", dfs_months); } Object os_months = parameters.get("Delay Overall Survival months"); if (os_months != null) { expgroup.put("os_months", os_months); } Double os = (Double) expgroup.get("os_months"); Double dfs = (Double) expgroup.get("dfs_months"); if (os != null && dfs != null && dfs.equals(os)) { expgroup.put("relapsed", false); } if (os != null && dfs != null && dfs < os) { expgroup.put("relapsed", true); } if (os != null && dfs != null && dfs > os) { expgroup.put("relapsed", null); } Object relapseDate = parameters.get("Relapse Metastasis Date"); if (relapseDate != null) { expgroup.put("relapsed", true); } // ==== Grade ==== expgroup.put("tnm_grade", parameters.get("Grade Scarff Bloom Richardson")); // ==== Files ===== expgroup.put("ftp", parameters.getString("ArrayExpress FTP file")); expgroup.put("file_name", parameters.getString("Array Data File")); expgroup.remove("individual"); if (parameters.getString("Individual") != null) { expgroup.put("individual", parameters.getString("Individual")); } // ==== Biomarkers ==== /* String p53 = parameters.getString("Phenotype - TP53 Gene mutation Status"); expgroup.put("p53", value) String pr = parameters.getString("PGR Protein expression"); String er = parameters.getString("ESR1 Protein expression"); String her2 = parameters.getString("ERBB2 Protein expression"); */ doc.put("exp_group", expgroup); System.out.println(i + " " + doc.get("_id") + " " + doc.get("analyzed") + " " + expgroup); if (commit) { UpdateResult updateResult = collection.updateOne(Filters.eq("_id", doc.get("_id")), new Document("$set", doc)); } } if (session.isOpen()) { session.close(); } sessionFactory.close(); mongoClient.close(); }
From source file:module.script.epilung.SearchSamples.java
License:Open Source License
public SearchSamples() { // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collectionSamples = db.getCollection("samples"); MongoCollection<Document> collectionPlatforms = db.getCollection("platforms"); Bson filters = Filters.and(// w ww.j av a2 s.c om Filters.in("exp_group.id_platform", new String[] { "GPL13534", "GPL8490", "GPL21145" }), Filters.eq("exp_group.id_tissue_status", 1), Filters.ne("exp_group.id_topology", null)); /* List<Document> list = collectionSamples .find(filters) .into(new ArrayList<Document>()); */ List<Document> list = collectionSamples.aggregate(Arrays.asList(Aggregates.match(filters), Aggregates.group("$exp_group.topology", Accumulators.sum("total", 1)), Aggregates.sort(Sorts.orderBy(Sorts.descending("total"))))).into(new ArrayList<Document>()); for (int i = 0; i < list.size(); i++) { System.out.println((i + 1) + " " + list.get(i)); } collectionPlatforms.find(Filters.regex("title", ".*ethyl.*")).forEach(printBlock); mongoClient.close(); }
From source file:module.script.epilung.SearchSamplesLungAdenocarcinoma.java
License:Open Source License
public SearchSamplesLungAdenocarcinoma() { // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collectionSamples = db.getCollection("samples"); MongoCollection<Document> collectionPlatforms = db.getCollection("platforms"); System.out.println("\n================== SUMMARY =================="); Bson[] pFilters = { Filters.eq("_id", "GPL570"), Filters.eq("type", "rna-seq") }; for (Bson pFilter : pFilters) { // === Platforms === List<String> platforms = new ArrayList<String>(); List<String> platformstext = new ArrayList<String>(); List<Document> list = collectionPlatforms.find(pFilter).into(new ArrayList<Document>()); for (Document doc : list) { platforms.add(doc.getString("_id")); platformstext.add(doc.getString("_id") + " " + doc.getString("type")); }//from w w w . java2s .c om String lungAdenoFilterName = "Lung adenocarcinoma samples with survival"; Bson lungAdenoFilter = Filters.and(Filters.in("exp_group.id_platform", platforms), Filters.eq("exp_group.id_tissue_status", 3), Filters.eq("exp_group.id_topology_group", "C34"), Filters.regex("exp_group.morphology", ".*denocarcinoma.*"), Filters.or(Filters.ne("exp_group.os_months", null), Filters.ne("exp_group.dfs_months", null))); String ntlFilterName = "Normal lung samples"; Bson ntlFilter = Filters.and(Filters.in("exp_group.id_platform", platforms), Filters.eq("exp_group.id_tissue_status", 1), Filters.eq("exp_group.id_topology_group", "C34")); String[] filterNames = { lungAdenoFilterName, ntlFilterName }; Bson[] sFilters = { lungAdenoFilter, ntlFilter }; for (int i = 0; i < sFilters.length; i++) { Bson filter = sFilters[i]; String filterName = filterNames[i]; List<Document> docs = collectionSamples.find(filter).into(new ArrayList<Document>()); Set<String> setGse = new HashSet<String>(); for (Document doc : docs) { setGse.add(doc.getString("main_gse_number")); // System.out.println(doc); } System.out.println("-------------------------------------------"); System.out.println("Query: " + filterName); System.out.println("Platforms: " + platformstext); System.out.println("Samples: " + docs.size()); System.out.println("Series: " + setGse); } } /* List<Document> list = collectionSamples .aggregate( Arrays.asList( Aggregates.match(filters), Aggregates.group("$exp_group.topology", Accumulators.sum("total", 1)), Aggregates.sort(Sorts.orderBy(Sorts.descending("total"))) )) .into(new ArrayList<Document>()); */ // collectionPlatforms.find(Filters.regex("title", ".*ethyl.*")).forEach(printBlock); mongoClient.close(); }
From source file:module.script.epimed_ontology.AddEpimedGroupToSamples.java
License:Open Source License
public AddEpimedGroupToSamples() { // ===== Session PostgreSQL ===== SessionFactory sessionFactory = HibernateUtil .buildSessionFactory("config/epimed_semantic.hibernate.cfg.xml"); Session session = sessionFactory.openSession(); ClTopologyDao topologyDao = new ClTopologyDao(session); // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collectionSample = db.getCollection("sample"); List<Document> samples = collectionSample.find().into(new ArrayList<Document>()); for (int i = 0; i < samples.size(); i++) { Document sample = samples.get(i); Document expgroup = sample.get("exp_group", Document.class); String idTopology = expgroup.getString("id_topology"); if (idTopology != null && !idTopology.isEmpty()) { ClTopology topology = topologyDao.find(idTopology); ClEpimedGroup grp1 = topology.getClEpimedGroup(); ClEpimedGroup grp2 = grp1.getParent(); ClEpimedGroup grp3 = grp2.getParent(); expgroup.append("tissue_group_level1", grp1.getName()); expgroup.append("tissue_group_level2", grp2.getName()); expgroup.append("tissue_group_level3", grp3.getName()); System.out.println((i + 1) + "/" + samples.size() + " " + expgroup); sample.append("exp_group", expgroup); collectionSample.updateOne(Filters.eq("_id", sample.getString("_id")), new Document("$set", sample)); }/*from w w w .j a v a 2 s. c om*/ } // === Commit transaction === // session.getTransaction().commit(); session.getTransaction().rollback(); if (session.isOpen()) { session.close(); } sessionFactory.close(); mongoClient.close(); }
From source file:module.script.epimed_ontology.UpdateFetalAdultOvary.java
License:Open Source License
public UpdateFetalAdultOvary() { // ===== Session PostgreSQL ===== SessionFactory sessionFactory = HibernateUtil .buildSessionFactory("config/epimed_semantic.hibernate.cfg.xml"); Session session = sessionFactory.openSession(); ClTopologyDao topologyDao = new ClTopologyDao(session); ClTopology adultOvary = topologyDao.find("C56.9"); ClTopology fetalOvary = topologyDao.find("E56.9"); // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collectionSample = db.getCollection("sample"); Bson filters = Filters.and(Filters.eq("exp_group.id_topology", "C56.9"), // ovary Filters.eq("exp_group.id_tissue_stage", 1) // adult );/*w ww .ja va 2 s.co m*/ List<Document> samples = collectionSample.find(filters).into(new ArrayList<Document>()); for (Document sample : samples) { Document expgroup = sample.get("exp_group", Document.class); expgroup.append("id_topology", adultOvary.getIdTopology()); expgroup.append("topology", adultOvary.getName()); sample.append("exp_group", expgroup); collectionSample.updateOne(Filters.eq("_id", sample.getString("_id")), new Document("$set", sample)); } System.out.println(samples.size()); // === Commit transaction === // session.getTransaction().commit(); session.getTransaction().rollback(); if (session.isOpen()) { session.close(); } sessionFactory.close(); mongoClient.close(); }
From source file:module.script.ImportArrayExpress1733.java
License:Open Source License
public ImportArrayExpress1733() { // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collectionSeries = db.getCollection("series"); MongoCollection<Document> collectionSamples = db.getCollection("samples"); // ===== Pattern ===== String patternText = "\\[[\\p{Print}\\p{Space}]+\\]"; ;/*from ww w . ja v a 2 s .c o m*/ Pattern pattern = Pattern.compile(patternText); // ===== Series ===== for (String accession : listAccessions) { List<String> accessionAsList = new ArrayList<String>(); accessionAsList.add(accession); String urlString = "https://www.ebi.ac.uk/arrayexpress/files/" + accession + "/" + accession + ".idf.txt"; System.out.println(urlString); String text = webService.loadUrl(urlString); String[] parts = text.split(lineSeparator); List<String> dataSeries = new ArrayList<String>(Arrays.asList(parts)); AESeries series = new AESeries(dataSeries); System.out.println(series); // ===== Check if already imported as a GSE ===== boolean isGseFound = false; String gseNumber = null; for (String secondaryAccession : series.getListAccessions()) { if (secondaryAccession.startsWith("GSE")) { gseNumber = secondaryAccession; Document gse = db.getCollection("series").find(Filters.eq("_id", secondaryAccession)).first(); isGseFound = gse != null; } } int nbImportedSamples = 0; if (!isGseFound) { // ===== Create Mongo series ===== Document docSeries = mongoService.createSeries(accession, series.getTitle(), null, series.getSubmissionDate(), series.getSubmissionDate()); if (series.getListAccessions() != null && !series.getListAccessions().isEmpty()) { docSeries.put("secondary_accessions", series.getListAccessions()); } if (false) { UpdateResult updateResult = collectionSeries.updateOne(Filters.eq("_id", accession), new Document("$set", docSeries)); if (updateResult.getMatchedCount() == 0) { collectionSeries.insertOne(docSeries); } } System.out.println(docSeries); // ===== Import clinical data ===== String url = "https://www.ebi.ac.uk/arrayexpress/files/" + accession + "/" + series.getSdrf(); System.out.println(url); String clindata = webService.loadUrl(url); String[] clinparts = clindata.split(lineSeparator); List<String> data = new ArrayList<String>(Arrays.asList(clinparts)); // ===== Recognize samples ===== List<String> header = this.createHeader(data.get(0), pattern); System.out.println(header); for (int i = 1; i < data.size(); i++) { Integer nbSamples = data.size() - 1; Map<String, Object> mapParameters = this.createMapParameters(data.get(i), header); String idSample = this.createIdSample(mapParameters); if (idSample == null) { System.err.println("ERROR: idSample is not recongnized for " + accession); System.out.println("Line " + i); System.out.println(mapParameters); mongoClient.close(); System.exit(0); } else { if (formatIdSample) { idSample = "E-MTAB-2836" + "-" + idSample; idSample = idSample.trim().replaceAll(" ", "-"); } } idSample = idSample.split(" ")[0].trim(); // === Organism === String organism = (String) mapParameters.get("organism"); if (organism == null || organism.isEmpty()) { organism = defaultOrganism; } // === Platform === String platform = (String) mapParameters.get("LIBRARY_STRATEGY"); if (platform != null && !platform.isEmpty()) { platform = platform.toLowerCase().trim(); } else { platform = defaultPlatform; } Document docSampleExist = collectionSamples.find(Filters.eq("_id", idSample)).first(); boolean docAlreadyExist = docSampleExist != null; System.out.println("docAlreadyExist " + docAlreadyExist); // === Delete old if already exist === if (docAlreadyExist) { List<String> listSeries = (List<String>) docSampleExist.get("series"); Set<String> setSeries = new HashSet<String>(); listSeries.add(accession); setSeries.addAll(listSeries); listSeries.clear(); listSeries.addAll(setSeries); docSampleExist.append("series", listSeries); System.out.println(docSampleExist); if (commit) { collectionSamples.deleteOne(eq("_id", docSampleExist.get("_id"))); collectionSamples.insertOne(docSampleExist); } } } } else { System.out.println("GEO accession " + gseNumber + " corresponding to " + accession + " exists already. Skip import."); } System.out.println("Number of imported samples: " + nbImportedSamples); } mongoClient.close(); }
From source file:module.script.ImportSupplementaryGSE20711.java
License:Open Source License
public ImportSupplementaryGSE20711() { // ===== Session PostgreSQL ===== SessionFactory sessionFactory = HibernateUtil .buildSessionFactory("config/epimed_semantic.hibernate.cfg.xml"); Session session = sessionFactory.openSession(); // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collection = db.getCollection("samples"); // ===== Excel data loader ===== String inputfile = this.getInputDirectory() + this.getDirSeparator() + "GSE20711_emmm0003-0726-SD2.xlsx"; System.out.println("LOADING \t " + inputfile); ExcelService excelService = new ExcelService(); excelService.load(inputfile);/* w w w. ja va2 s . c o m*/ String gseNumber = "GSE20711"; for (int i = 0; i < excelService.getData().size(); i++) { List<Object> dataLine = excelService.getData().get(i); String bcString = (String) dataLine.get(0); bcString = bcString.replaceAll("BC", ""); Integer bcNumber = Integer.parseInt(bcString); Document docSample = collection .find(Filters .and(Filters.in("series", gseNumber), Filters.eq("exp_group.sample_title", "Breast tumor from patient P_" + bcNumber + " (expression data)"))) .first(); System.out.println("-------------------------------------------"); System.out.println(dataLine); System.out.println(docSample); } if (session.isOpen()) { session.close(); } sessionFactory.close(); mongoClient.close(); }