List of usage examples for com.mongodb.client MongoDatabase getCollection
MongoCollection<Document> getCollection(String collectionName);
From source file:module.AnalyseGeo.java
License:Open Source License
public AnalyseGeo() { // ===== Session PostgreSQL ===== SessionFactory sessionFactory = HibernateUtil .buildSessionFactory("config/epimed_semantic.hibernate.cfg.xml"); Session session = sessionFactory.openSession(); // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collection = db.getCollection("sample"); List<Document> listDocuments = collection.find(Filters.in("series", gseNumber)) // .find(Filters.and(Filters.in("series", gseNumber), Filters.eq("analyzed", false))) .into(new ArrayList<Document>()); // ===== Service ===== OntologyService ontologyService = new OntologyService(session); DispatcherFactory dispatcherFactory = new DispatcherFactory(session); // ===== Begin transaction ===== session.beginTransaction();/*from w w w .j a v a2s .c om*/ // ===== Analyse ====== for (int i = 0; i < listDocuments.size(); i++) { // for (int i=0; i<1; i++) { Document doc = listDocuments.get(i); Document expGroup = (Document) doc.get("exp_group"); String gsmNumber = doc.getString("_id"); List<String> listEntries = new ArrayList<String>(); List<String> parameters = new ArrayList<String>(); String title = (String) expGroup.get("sample_title"); String source = (String) expGroup.get("sample_source"); listEntries.add(title); listEntries.add(source); Map<String, Object> mapParameters = (Map<String, Object>) doc.get("parameters"); parameters.addAll(mapParameters.keySet()); parameters.remove("id_sample"); parameters.remove("extract_protocol"); // To remove parameters.remove("lab description"); for (int j = 0; j < parameters.size(); j++) { listEntries.add(parameters.get(j) + ": " + mapParameters.get(parameters.get(j))); } // === Clear already filled fields (only if necessary) === // this.clear(expGroup); Map<String, List<Object>> mapOntologyObjects = ontologyService.recognizeOntologyObjects(listEntries); // Map <ClOntologyCategory, Set<String>> mapOntologyCategories = ontologyService.getMapOntologyCategories(); // this.generateSummary(ontologyService, mapOntologyCategories, mapOntologyObjects); System.out.println("------------------------------------------------------------"); System.out.println(i + " " + gsmNumber + " " + listEntries); System.out.println(ontologyService.toString()); // ===== Create mapping objects and making links ===== try { // === Dispatcher === for (int j = 0; j < categories.length; j++) { dispatcherFactory.getObject(expGroup, mapOntologyObjects, categories[j]); System.out.print(categories[j]); if (expGroup.getString(categories[j]) != null) { System.out.print(" " + expGroup.getString(categories[j]) + "\n"); } else { System.out.print("\n"); } } System.out.println(expGroup); // Update Mongo document doc.put("exp_group", expGroup); doc.put("analyzed", true); if (commit) { UpdateResult updateResult = collection.updateOne(Filters.eq("_id", gsmNumber), new Document("$set", doc)); } } catch (DispatcherException e) { // TODO Auto-generated catch block e.printStackTrace(); } } if (commit) { MongoCollection<Document> collectionSeries = db.getCollection("series"); Document series = collectionSeries.find(Filters.eq("_id", gseNumber)).first(); series.put("status", "analyzed"); collectionSeries.updateOne(Filters.eq("_id", gseNumber), new Document("$set", series)); } // === Commit transaction === session.getTransaction().commit(); // session.getTransaction().rollback(); if (session.isOpen()) { session.close(); } sessionFactory.close(); mongoClient.close(); }
From source file:module.ClearGeoExpGroup.java
License:Open Source License
public ClearGeoExpGroup() { // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collection = db.getCollection("samples"); List<Document> listDocuments = collection.find(Filters.in("series", gseNumber)) .into(new ArrayList<Document>()); // ===== Analyse ====== for (int i = 0; i < listDocuments.size(); i++) { Document doc = listDocuments.get(i); String id = doc.getString("_id"); Document expGroup = (Document) doc.get("exp_group"); this.clear(expGroup); expGroup.remove("er"); expGroup.remove("pr"); expGroup.remove("her2"); expGroup.remove("triple_negative"); // Update Mongo document doc.put("exp_group", expGroup); doc.put("analyzed", false); if (commit) { UpdateResult updateResult = collection.updateOne(Filters.eq("_id", id), new Document("$set", doc)); }//from w w w . ja v a 2 s . c o m } mongoClient.close(); }
From source file:module.CreateStudy.java
License:Open Source License
@SuppressWarnings("unchecked") public CreateStudy() { // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); // === Excel data loader === String inputfile = this.getInputDirectory() + this.getDirSeparator() + "prolung2_expgrp4.xlsx"; System.out.println("LOADING \t " + inputfile); ExcelService excelService = new ExcelService(); excelService.load(inputfile);/*from w ww . j ava 2 s . c o m*/ List<Object> listCel = excelService.extractColumn(0); Integer indCel = excelService.getHeaderMap().get("gse8894_sample_cel"); // === New Series === MongoCollection<Document> collectionSeries = db.getCollection("series"); Document docSeries = new Document(); docSeries.append("_id", "PROLUNG").append("title", "Lung cancerous and non-cancerous samples") .append("platforms", null).append("submission_date", today).append("last_update", today) .append("import_date", today); UpdateResult updateResult = collectionSeries.updateOne(Filters.eq("_id", docSeries.get("_id")), new Document("$set", docSeries)); if (updateResult.getMatchedCount() == 0) { collectionSeries.insertOne(docSeries); } // === Add samples to new series === MongoCollection<Document> collectionSamples = db.getCollection("samples"); for (int i = 0; i < listCel.size(); i++) { String gsm = this.getGsm(listCel.get(i)); Document docSample = collectionSamples.find(Filters.eq("_id", gsm)).first(); if (docSample == null) { System.err.println("ERROR! Sample " + gsm + "doesn't exist. Try another column."); gsm = this.getGsm(excelService.getData().get(i).get(indCel)); docSample = collectionSamples.find(Filters.eq("_id", gsm)).first(); if (docSample == null) { System.err.println("ERROR! Sample " + gsm + " doesn't exist. Exit."); System.exit(0); } else { System.err.println("Found " + gsm); } } Document expGroup = (Document) docSample.get("exp_group"); setGpl.add(expGroup.get("id_platform").toString()); List<String> listSeries = (List<String>) docSample.get("series"); listSeries.add(docSeries.getString("_id")); docSample.put("series", listSeries); System.out.println(docSample); // updateResult = collectionSamples.updateOne(Filters.eq("_id", docSample.get("_id")), new Document("$set", docSample)); } // === Update platforms of the series === System.out.println(setGpl); docSeries.put("platforms", setGpl); updateResult = collectionSeries.updateOne(Filters.eq("_id", docSeries.get("_id")), new Document("$set", docSeries)); if (updateResult.getMatchedCount() == 0) { collectionSeries.insertOne(docSeries); } }
From source file:module.ExportGeo.java
License:Open Source License
public ExportGeo() { String gseNumber = "GSE2109"; // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collection = db.getCollection("samples"); // ===== Find exp_group in the database ===== List<Document> docExpGroup = collection.find(Filters.in("series", gseNumber)) .projection(Projections.fields(Projections.include("exp_group"), Projections.excludeId())) .into(new ArrayList<Document>()); List<Document> docParam = collection.find(Filters.in("series", gseNumber)) .projection(Projections.fields(Projections.include("parameters"), Projections.excludeId())) .into(new ArrayList<Document>()); mongoClient.close();/*from www . j a va2s . c o m*/ // ===== Load Exp Group into a matrix ===== List<String> headerExpGroup = new ArrayList<String>(); List<Object> dataExpGroup = new ArrayList<Object>(); for (int i = 0; i < docExpGroup.size(); i++) { Map<String, String> expGroup = (Map<String, String>) docExpGroup.get(i).get("exp_group"); if (i == 0) { headerExpGroup.addAll(expGroup.keySet()); } String[] dataLine = new String[headerExpGroup.size()]; for (int j = 0; j < headerExpGroup.size(); j++) { dataLine[j] = expGroup.get(headerExpGroup.get(j)); } dataExpGroup.add(dataLine); } // ===== Load Params into a matrix ===== Set<String> headerParamSet = new HashSet<String>(); List<String> headerParam = new ArrayList<String>(); List<Object> dataParam = new ArrayList<Object>(); for (int i = 0; i < docParam.size(); i++) { Map<String, String> param = (Map<String, String>) docParam.get(i).get("parameters"); headerParamSet.addAll(param.keySet()); } headerParam.addAll(headerParamSet); Collections.sort(headerParam); for (int i = 0; i < docParam.size(); i++) { Map<String, String> param = (Map<String, String>) docParam.get(i).get("parameters"); String[] dataLine = new String[headerParam.size()]; for (int j = 0; j < headerParam.size(); j++) { dataLine[j] = param.get(headerParam.get(j)); } // System.out.println(Arrays.toString(dataLine)); dataParam.add(dataLine); } // === Output === String fileName = this.getOutputDirectory() + this.getDirSeparator() + "Export_Mongo_" + gseNumber + "_" + dateFormat.format(new Date()) + ".xlsx"; System.out.println(fileName); XSSFWorkbook workbook = fileService.createWorkbook(); fileService.addSheet(workbook, "exp_group" + dateFormat.format(new Date()), headerExpGroup, dataExpGroup); fileService.addSheet(workbook, "parameters_" + dateFormat.format(new Date()), headerParam, dataParam); fileService.writeWorkbook(workbook, fileName); }
From source file:module.ImportArrayExpress.java
License:Open Source License
public ImportArrayExpress() { // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collectionSeries = db.getCollection("series"); MongoCollection<Document> collectionSamples = db.getCollection("sample"); // ===== Pattern ===== String patternText = "\\[[\\p{Print}\\p{Space}]+\\]"; ;//from w w w .ja va 2 s. c o m Pattern pattern = Pattern.compile(patternText); // ===== Series ===== for (String accession : listAccessions) { List<String> accessionAsList = new ArrayList<String>(); accessionAsList.add(accession); String urlString = "https://www.ebi.ac.uk/arrayexpress/files/" + accession + "/" + accession + ".idf.txt"; System.out.println(urlString); String text = webService.loadUrl(urlString); String[] parts = text.split(lineSeparator); List<String> dataSeries = new ArrayList<String>(Arrays.asList(parts)); AESeries series = new AESeries(dataSeries); System.out.println(series); // ===== Check if already imported as a GSE ===== boolean isGseFound = false; String gseNumber = null; for (String secondaryAccession : series.getListAccessions()) { if (secondaryAccession.startsWith("GSE")) { gseNumber = secondaryAccession; Document gse = db.getCollection("series").find(Filters.eq("_id", secondaryAccession)).first(); isGseFound = gse != null; } } int nbImportedSamples = 0; if (!isGseFound) { // ===== Create Mongo series ===== Document docSeries = mongoService.createSeries(accession, series.getTitle(), null, series.getSubmissionDate(), series.getSubmissionDate()); if (series.getListAccessions() != null && !series.getListAccessions().isEmpty()) { docSeries.put("secondary_accessions", series.getListAccessions()); } if (commit) { UpdateResult updateResult = collectionSeries.updateOne(Filters.eq("_id", accession), new Document("$set", docSeries)); if (updateResult.getMatchedCount() == 0) { collectionSeries.insertOne(docSeries); } } System.out.println(docSeries); // ===== Import clinical data ===== String url = "https://www.ebi.ac.uk/arrayexpress/files/" + accession + "/" + series.getSdrf(); System.out.println(url); String clindata = webService.loadUrl(url); String[] clinparts = clindata.split(lineSeparator); List<String> data = new ArrayList<String>(Arrays.asList(clinparts)); // ===== Recognize samples ===== List<String> header = this.createHeader(data.get(0), pattern); System.out.println(header); for (int i = 1; i < data.size(); i++) { Integer nbSamples = data.size() - 1; Map<String, Object> mapParameters = this.createMapParameters(data.get(i), header); String idSample = this.createIdSample(mapParameters); if (idSample == null) { System.err.println("ERROR: idSample is not recongnized for " + accession); System.out.println("Line " + i); System.out.println(mapParameters); mongoClient.close(); System.exit(0); } else { if (formatIdSample) { idSample = accession + "-" + idSample; idSample = idSample.trim().replaceAll(" ", "-"); } } idSample = idSample.split(" ")[0].trim(); // === Organism === String organism = (String) mapParameters.get("organism"); if (organism == null || organism.isEmpty()) { organism = defaultOrganism; } // === Platform === String platform = (String) mapParameters.get("LIBRARY_STRATEGY"); if (platform != null && !platform.isEmpty()) { platform = platform.toLowerCase().trim(); } else { platform = defaultPlatform; } Document docSampleExist = collectionSamples.find(Filters.eq("_id", idSample)).first(); boolean docAlreadyExist = docSampleExist != null; boolean analysed = false; if (docAlreadyExist) { analysed = (Boolean) docSampleExist.get("analyzed"); } // ===== Sample Document ===== Document docSample = mongoService.createSample(idSample, (String) docSeries.get("_id"), accessionAsList, organism, (Date) docSeries.get("submission_date"), (Date) docSeries.get("last_update"), analysed); Document expGroup = null; Document parameters = null; // System.out.println("------------------------------------------------------------------"); if (docAlreadyExist) { // === ID sample alredy exists === System.out.println(i + "/" + nbSamples + "\t " + docSeries.get("_id") + "\t " + idSample + ": already exists in the database, analyzed=" + analysed); expGroup = docSampleExist.get("exp_group", Document.class); parameters = mongoService.updateParameters(docSampleExist, mapParameters); } else { // === New sample === System.out.println(i + "/" + nbSamples + "\t " + docSeries.get("_id") + "\t " + idSample); expGroup = mongoService.createExpGroup(docSample, platform, null, null, organism); parameters = mongoService.createParameters(docSample, mapParameters); nbImportedSamples++; } // === Update sample_title, sample_source, layout === expGroup.put("sample_title", parameters.getString("organism part")); expGroup.put("sample_source", parameters.getString("Source Name")); expGroup.put("layout", parameters.getString("LIBRARY_LAYOUT")); docSample.append("exp_group", expGroup); docSample.append("parameters", parameters); if (commit) { // === Update old if already exist === if (docAlreadyExist) { // collectionSamples.deleteOne(eq("_id", idSample)); collectionSamples.updateOne(Filters.eq("_id", idSample), new Document("$set", docSample)); } else { // ===== Insert data ===== collectionSamples.insertOne(docSample); } // ===== Update series for platforms ===== List<String> listPlatforms = collectionSamples .distinct("exp_group.id_platform", Filters.in("series", accession), String.class) .into(new ArrayList<String>()); docSeries.append("platforms", listPlatforms); collectionSeries.updateOne(Filters.eq("_id", accession), new Document("$set", docSeries)); } } } else { System.out.println("GEO accession " + gseNumber + " corresponding to " + accession + " exists already. Skip import."); } System.out.println("Number of imported samples: " + nbImportedSamples); } mongoClient.close(); }
From source file:module.ImportArrayExpressInit.java
License:Open Source License
public ImportArrayExpressInit() { // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collectionSeries = db.getCollection("series"); MongoCollection<Document> collectionSamples = db.getCollection("sample"); // ===== Pattern ===== String patternText = "\\[[\\p{Print}\\p{Space}]+\\]"; ;/* w w w.j ava 2 s. c om*/ Pattern pattern = Pattern.compile(patternText); // ===== Series ===== for (String accession : listAccessions) { String urlString = "https://www.ebi.ac.uk/arrayexpress/files/" + accession + "/" + accession + ".idf.txt"; System.out.println(urlString); String text = webService.loadUrl(urlString); String[] parts = text.split(lineSeparator); List<String> dataSeries = new ArrayList<String>(Arrays.asList(parts)); AESeries series = new AESeries(dataSeries); System.out.println(series); // ===== Check if already imported as a GSE ===== boolean isGseFound = false; String gseNumber = null; for (String secondaryAccession : series.getListAccessions()) { if (secondaryAccession.startsWith("GSE")) { gseNumber = secondaryAccession; Document gse = db.getCollection("series").find(Filters.eq("_id", secondaryAccession)).first(); isGseFound = gse != null; // System.out.println("GEO accession " + gseNumber + " found: " + isGseFound); } } if (!isGseFound) { // ===== Create Mongo series ===== List<String> listSeriesAcc = new ArrayList<String>(); listSeriesAcc.add(accession); Document docSeries = mongoService.createSeries(accession, series.getTitle(), null, series.getSubmissionDate(), series.getSubmissionDate()); if (series.getListAccessions() != null && !series.getListAccessions().isEmpty()) { listSeriesAcc.addAll(series.getListAccessions()); } docSeries.put("accessions", listSeriesAcc); UpdateResult updateResult = collectionSeries.updateOne(Filters.eq("_id", accession), new Document("$set", docSeries)); if (updateResult.getMatchedCount() == 0) { collectionSeries.insertOne(docSeries); } System.out.println(docSeries); // ===== Import clinical data ===== String url = "https://www.ebi.ac.uk/arrayexpress/files/" + accession + "/" + series.getSdrf(); System.out.println(url); String clindata = webService.loadUrl(url); String[] clinparts = clindata.split(lineSeparator); List<String> data = new ArrayList<String>(Arrays.asList(clinparts)); // ===== Samples ===== List<String> header = this.createHeader(data.get(0), pattern); System.out.println(header); for (int i = 1; i < data.size(); i++) { Integer nbSamples = data.size() - 1; Map<String, Object> mapParameters = this.createParameters(data.get(i), header); String idSample = this.createIdSample(mapParameters); if (idSample == null) { System.err.println("idSample is not recongnized for " + mapParameters); mongoClient.close(); System.exit(0); } String organism = (String) mapParameters.get("organism"); if (organism == null || organism.isEmpty()) { organism = "Homo sapiens"; } String platform = (String) mapParameters.get("LIBRARY_STRATEGY"); if (platform != null && !platform.isEmpty()) { platform = platform.toLowerCase().trim(); } else { platform = "rna-seq"; } String layout = (String) mapParameters.get("LIBRARY_LAYOUT"); if (layout != null && !layout.isEmpty()) { layout = layout.toLowerCase().trim(); } Document docSampleExist = collectionSamples.find(Filters.eq("_id", idSample)).first(); boolean docAlreadyExist = docSampleExist != null; boolean analysed = false; if (docAlreadyExist) { analysed = (Boolean) docSampleExist.get("analyzed"); System.out.println(i + "/" + nbSamples + "\t " + docSeries.get("_id") + "\t " + idSample + ": already exists in the database, analyzed=" + analysed); } else { System.out.println(i + "/" + nbSamples + "\t " + docSeries.get("_id") + "\t " + idSample); } // ===== Sample Document ===== Document docSample = mongoService.createSample(idSample, (String) docSeries.get("_id"), listSeriesAcc, organism, (Date) docSeries.get("submission_date"), (Date) docSeries.get("last_update"), analysed); // ===== Mandatory parameters ===== // Preserve "exp_group" if the document exists already Document expGroup = null; if (docAlreadyExist) { expGroup = (Document) docSampleExist.get("exp_group"); } else { expGroup = mongoService.createExpGroup(docSample, platform, (String) mapParameters.get("organism part"), (String) mapParameters.get("Source Name"), organism); if (layout != null) { expGroup.append("layout", layout); // run_name int j = 0; boolean isFound = false; String runName = null; while (!isFound && j < listRunNameParameters.length) { runName = (String) mapParameters.get(listRunNameParameters[j]); isFound = runName != null; j++; } if (runName != null) { expGroup.append("run_name", runName); } } } docSample.append("exp_group", expGroup); // ===== Supplementary parameters ===== Document parameters = mongoService.createParameters(docSample, mapParameters); docSample.append("parameters", parameters); // === Delete if already exist === collectionSamples.deleteOne(eq("_id", idSample)); // ===== Insert data ===== collectionSamples.insertOne(docSample); // ===== Update series for platforms ===== List<String> listPlatforms = collectionSamples .distinct("exp_group.id_platform", Filters.in("series", accession), String.class) .into(new ArrayList<String>()); docSeries.append("platforms", listPlatforms); collectionSeries.updateOne(Filters.eq("_id", accession), new Document("$set", docSeries)); } } else { System.out.println("GEO accession " + gseNumber + " corresponding to " + accession + " exists already. Skip import."); } } mongoClient.close(); }
From source file:module.ImportGeo.java
License:Open Source License
public ImportGeo() { // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); // ===== Insert data ===== for (int k = 0; k < listGseNumber.length; k++) { String gseNumber = listGseNumber[k]; System.out.println("------------------------------------------"); System.out.println(k + " Import " + gseNumber); // ===== Load GSE ===== NcbiGeoGse gse = new NcbiGeoGse(webService.loadGeo(gseNumber)); System.out.println(gse);/*from w w w . j a v a2 s .c o m*/ // ===== Series ===== MongoCollection<Document> collectionSeries = db.getCollection("series"); Document docSeries = mongoService.createSeries(gse.getGseNumber(), gse.getTitle(), gse.getListGpl(), gse.getSubmissionDate(), gse.getLastUpdate()); UpdateResult updateResult = collectionSeries.updateOne(Filters.eq("_id", gse.getGseNumber()), new Document("$set", docSeries)); if (updateResult.getMatchedCount() == 0) { collectionSeries.insertOne(docSeries); } // ===== Platforms ===== MongoCollection<Document> collectionPlatforms = db.getCollection("platform"); for (int i = 0; i < gse.getListGpl().size(); i++) { NcbiGeoGpl gpl = new NcbiGeoGpl(webService.loadGeo(gse.getListGpl().get(i))); System.out.println("\t Import platform " + gpl.getGplNumber()); Document docPlatforms = mongoService.createPlatform(gpl.getGplNumber(), gpl.getTitle(), gpl.getTaxid(), gpl.getOrganism(), gpl.getManufacturer(), gpl.getSubmissionDate(), gpl.getLastUpdate(), gpl.getTechnology()); UpdateResult res = collectionPlatforms.updateOne(Filters.eq("_id", gpl.getGplNumber()), new Document("$set", docPlatforms)); if (res.getMatchedCount() == 0) { collectionPlatforms.insertOne(docPlatforms); } } // ===== Samples ====== MongoCollection<Document> collectionSamples = db.getCollection("sample"); // for (int i=0; i<1; i++) { for (int i = 0; i < gse.getListGsm().size(); i++) { NcbiGeoGsm gsm = new NcbiGeoGsm(webService.loadGeo(gse.getListGsm().get(i))); Document docSampleExist = collectionSamples.find(Filters.eq("_id", gsm.getGsmNumber())).first(); boolean docAlreadyExist = docSampleExist != null; boolean analysed = false; if (docAlreadyExist) { analysed = (Boolean) docSampleExist.get("analyzed"); System.out.println(i + "/" + gse.getListGsm().size() + "\t " + gse.getGseNumber() + "\t " + gsm.getGsmNumber() + ": already exists in the database, analyzed=" + analysed); } else { System.out.println(i + "/" + gse.getListGsm().size() + "\t " + gse.getGseNumber() + "\t " + gsm.getGsmNumber()); } // ===== Sample Document ===== Document docSample = mongoService.createSample(gsm.getGsmNumber(), gse.getGseNumber(), gsm.getListGse(), gsm.getOrganism(), gsm.getSubmissionDate(), gsm.getLastUpdate(), analysed); // ===== Mandatory parameters ===== // Preserve "exp_group" if the document exists already Document expGroup = null; if (docAlreadyExist) { expGroup = (Document) docSampleExist.get("exp_group"); } else { expGroup = mongoService.createExpGroup(docSample, gsm.getGplNumber(), gsm.getTitle(), gsm.getSourceName(), gsm.getOrganism()); } docSample.append("exp_group", expGroup); // ===== Supplementary parameters ===== Document parameters = generateParameters(gsm); docSample.append("parameters", parameters); // === Delete if already exist === collectionSamples.deleteOne(eq("_id", gsm.getGsmNumber())); // ===== Insert data ===== collectionSamples.insertOne(docSample); } } mongoClient.close(); }
From source file:module.ImportPlatform.java
License:Open Source License
public ImportPlatform() { // ===== Connection ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); MongoCollection<Document> collectionPlatforms = db.getCollection("platforms"); MongoCollection<Document> collectionSamples = db.getCollection("samples"); MongoCollection<Document> collectionSeries = db.getCollection("series"); // ===== Platforms ===== List<String> listGpl = collectionSamples.distinct("exp_group.id_platform", String.class) .into(new ArrayList<String>()); for (String idPlatform : listGpl) { Document doc = collectionPlatforms.find(Filters.in("_id", idPlatform)).first(); if (doc.getString("type") == null) { System.out.println(idPlatform + ": " + doc); }/*w w w .j av a 2s . c o m*/ } mongoClient.close(); }
From source file:module.ImportPlatformFromFile.java
License:Open Source License
public ImportPlatformFromFile() { // === Display === System.out.println("\n================ BEGIN Module " + this.getClass().getName() + "================"); // === INPUT === String idPlatform = "GPL97"; String inputfile = this.getInputDirectory() + this.getDirSeparator() + "GPL97-17394.txt"; String gpl = idPlatform.toLowerCase().trim(); // ===== Session PostgreSQL ===== SessionFactory sessionFactory = HibernateUtil .buildSessionFactory("config/epimed_semantic.hibernate.cfg.xml"); Session session = sessionFactory.openSession(); // ===== DAO ===== OmGeneDao geneDao = new OmGeneDao(session); // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); try {//from www. j a v a2s . c o m // === Begin transaction === session.beginTransaction(); // ===== Load file ===== System.out.println("ID Platform " + gpl); System.out.println("LOADING \t " + inputfile); System.out.println("Please wait... "); List<String> listRows = fileService.loadTextFile(inputfile); // List<String> listRows = webService.loadGeoData(idPlatform); System.out.println("File sucessfully LOADED"); // ===== Recognize header ===== List<String> header = fileService.readHeader(listRows, "\t"); if (header == null || header.isEmpty()) { throw new ImportDataException("The header is empty"); } else { System.out.println("Header " + header); } Integer indId = fileService.findIndex(header, "ID"); Integer indGbacc = fileService.findIndex(header, "GB_ACC"); Integer indEntrez = fileService.findIndex(header, "ENTREZ"); if (indId == null || indGbacc == null || indEntrez == null) { throw new ImportDataException("Header not recognized: " + "ID index=" + indId + ", GB_ACC index=" + indGbacc + ", ENTREZ index=" + indEntrez); } else { System.out.println("The following header items are recognized:"); System.out.println("\t ID index=" + indId + ": " + header.get(indId)); System.out.println("\t GB_ACC index=" + indGbacc + ": " + header.get(indGbacc)); System.out.println("\t ENTREZ index=" + indEntrez + ": " + header.get(indEntrez)); } // ===== Recognize data ===== List<List<String>> data = fileService.readData(listRows, "\t"); if (data == null || data.isEmpty()) { throw new ImportDataException("The data are empty"); } else { System.out.println( "The data are sucessfully loaded: rows " + data.size() + ", columns " + data.get(0).size()); } // ===== Create specific tables ===== String sqlCheckTableProbe = "select * from information_schema.tables WHERE table_schema = 'hs' and table_name='om_probe_" + gpl + "'"; List<Object> result = session.createNativeQuery(sqlCheckTableProbe).getResultList(); String tableProbe = "hs.om_probe_" + gpl; String tableGP = "hs.om_gp_" + gpl; if (result == null || result.isEmpty()) { // Table probe String sqlCreateTableProbe = "create table " + tableProbe + "(id_probe VARCHAR(50) not null," + " genbank_acc VARCHAR(50) null," + " constraint pk_om_probe_" + gpl + " primary key (id_probe))"; session.createNativeQuery(sqlCreateTableProbe).executeUpdate(); // Table gp String sqlCreateTableGP = "create table " + tableGP + "(id_probe VARCHAR(50) not null," + " id_gene INT4 not null," + " constraint pk_om_gp_" + gpl + " primary key (id_probe, id_gene))"; session.createNativeQuery(sqlCreateTableGP).executeUpdate(); // Foregn keys String sqlAlterTableProbe = "alter table " + tableGP + " add constraint fk_gp_probe_" + gpl + " foreign key (id_probe)" + " references " + tableProbe + " (id_probe) on delete restrict on update restrict"; session.createNativeQuery(sqlAlterTableProbe).executeUpdate(); String sqlAlterTableGene = "alter table " + tableGP + " add constraint fk_gp_gene_" + gpl + " foreign key (id_gene)" + " references hs.om_gene (id_gene) on delete restrict on update restrict"; session.createNativeQuery(sqlAlterTableGene).executeUpdate(); } // ===== Import data ===== for (int i = 0; i < data.size(); i++) { // for (int i=0; i<10; i++) { List<String> dataline = data.get(i); String idProbe = dataline.get(indId).trim(); String genbankAcc = dataline.get(indGbacc).trim(); String sqlInsertProbe = "insert into " + tableProbe + " values('" + idProbe + "', null)"; if (genbankAcc != null && !genbankAcc.isEmpty()) { sqlInsertProbe = "insert into " + tableProbe + " values('" + idProbe + "', '" + genbankAcc + "')"; } session.createNativeQuery(sqlInsertProbe).executeUpdate(); OmGenbankUnigene gu = session.get(OmGenbankUnigene.class, genbankAcc); if (gu == null && genbankAcc != null && !genbankAcc.isEmpty()) { gu = new OmGenbankUnigene(); gu.setGenbankAcc(genbankAcc); session.save(gu); } String listEntrez = null; String[] parts = null; if (indEntrez < dataline.size()) { listEntrez = dataline.get(indEntrez).trim(); parts = listEntrez.split("[///\\p{Space}]"); for (String entrezString : parts) { Integer entrez = null; try { entrez = Integer.parseInt(entrezString); } catch (NumberFormatException e) { // nothing to do } if (entrez != null) { OmGene gene = geneDao.find(entrez); if (gene == null) { gene = geneDao.createGene(entrez, null); } String sqlInsertGP = "insert into " + tableGP + " values('" + idProbe + "', " + entrez + ")"; session.createNativeQuery(sqlInsertGP).executeUpdate(); } } } if (i % 1000 == 0) { System.out.println(i + "\t" + idProbe + "\t" + genbankAcc + "\t" + listEntrez + "\t" + Arrays.toString(parts)); } if (i % 20 == 0) { session.flush(); } } // ===== Subscribe platform ===== OmPlatform platform = session.get(OmPlatform.class, idPlatform); if (platform != null) { platform.setEnabled(true); session.update(platform); } else { MongoCollection<Document> collection = db.getCollection("platforms"); Document docPlatform = collection.find(Filters.eq("_id", idPlatform)).first(); String title = docPlatform.getString("title"); String manufacturer = docPlatform.getString("manufacturer"); platform = new OmPlatform(); platform.setIdPlatform(idPlatform); platform.setTitle(title); platform.setManufacturer(manufacturer); platform.setEnabled(true); session.save(platform); } // ===== Rights ===== String sqlRights; String[] users = { "epimed_prod", "epimed_web", "epimed_script" }; for (String user : users) { sqlRights = "GRANT SELECT ON ALL TABLES IN SCHEMA hs TO " + user; session.createNativeQuery(sqlRights).executeUpdate(); } sqlRights = "GRANT ALL ON ALL TABLES IN SCHEMA hs TO epimed_admin"; session.createNativeQuery(sqlRights).executeUpdate(); // === Commit transaction === session.getTransaction().commit(); // session.getTransaction().rollback(); } catch (Exception e) { session.getTransaction().rollback(); System.out.println("ROLLBACK in module " + this.getClass().getName()); e.printStackTrace(); } finally { if (session.isOpen()) { session.close(); } sessionFactory.close(); mongoClient.close(); } // === Display === System.out.println("================ END Module " + this.getClass().getName() + "================"); }
From source file:module.script.AddSeriesToSamples.java
License:Open Source License
public AddSeriesToSamples() { // ===== Service ===== FormatService formatService = new FormatService(); // ===== Session Mongo ===== MongoClient mongoClient = MongoUtil.buildMongoClient(); MongoDatabase db = mongoClient.getDatabase("epimed_experiments"); Set<String> setProjects = new HashSet<String>(); MongoCollection<Document> collection = db.getCollection("sample"); Bson filters = Filters.and(Filters.in("series", "PRJNA270632")); List<Document> listDocuments = collection.find(filters).into(new ArrayList<Document>()); for (int i = 0; i < listDocuments.size(); i++) { Document doc = listDocuments.get(i); Document expgroup = doc.get("exp_group", Document.class); if (expgroup.get("exp_Mcount") != null) { List<String> projects = doc.get("series", ArrayList.class); setProjects.clear();//from w w w . j a v a 2 s .c o m setProjects.addAll(projects); setProjects.add("TISSUE_SPECIFIC_GENES_HS"); doc.put("series", setProjects); System.out.println(doc.getString("_id") + " " + projects + " -> " + setProjects); collection.updateOne(Filters.eq("_id", doc.getString("_id")), new Document("$set", doc)); } } mongoClient.close(); }