Java tutorial
/* * EBI MetaboLights - http://www.ebi.ac.uk/metabolights * Cheminformatics and Metabolism group * * European Bioinformatics Institute (EMBL-EBI), European Molecular Biology Laboratory, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, United Kingdom * * Last modified: 2015-Mar-19 * Modified by: kenneth * * Copyright 2015 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ /* * EBI MetaboLights - http://www.ebi.ac.uk/metabolights * Cheminformatics and Metabolism group * * European Bioinformatics Institute (EMBL-EBI), European Molecular Biology Laboratory, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, United Kingdom * * Last modified: 6/10/14 11:57 AM * Modified by: conesa * * * , EMBL, European Bioinformatics Institute, 2014. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package uk.ac.ebi.metabolights.repository.dao.filesystem.metabolightsuploader; import org.apache.commons.io.filefilter.RegexFileFilter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import uk.ac.ebi.metabolights.repository.dao.DAOFactory; import uk.ac.ebi.metabolights.repository.dao.hibernate.AccessionDAO; import uk.ac.ebi.metabolights.repository.dao.hibernate.DAOException; import uk.ac.ebi.metabolights.repository.utils.FileUtil; import uk.ac.ebi.metabolights.repository.utils.IsaTab2MetaboLightsConverter; import uk.ac.ebi.metabolights.repository.utils.StringUtils; import javax.naming.ConfigurationException; import java.io.*; import java.nio.charset.Charset; import java.util.*; /** * IsaTabReplacer * It replace StudyIds in ISATabFile by an accession number generated by accesionManager. * *@author conesa */ //@Controller public class IsaTabReplacer { static private Properties props = new Properties(); static private String pubDateStr; //Replace str to look for in i_Investigation.txt static private String subDateStr; //Replace str to look for in i_Investigation.txt static private String metaboliteProfTypeStr; //String to search for in i_Investigation.txt, only allow metabolite profiling static private String metaboliteProfValueStr; //String to search for in i_Investigation.txt, only allow metabolite profiling static private String fileWithIds; static private String newOntologyType; static private String newOntologyValue; //This is the new type ontology reference used in ISAcreator 1.7.5+ static private String sampleFile; static private String validateError = "***** You must make sure your study successfully passes the ISAcreator validation (file -> validate ISAtab) before resubmitting your study! *****"; static final String PROP_IDS = "isatab.ids"; static String[] idList; static final String PROP_FILE_WITH_IDS = "isatab.investigationFile"; private static final Logger logger = LoggerFactory.getLogger(IsaTabReplacer.class); // Instance variables private String publicDate; //Date from submitter form private String submissionDate; //Date from submitter form private Integer studyNumber = 0; //Update when we find study ids in the file private String studyIdentifier; // When updating a study, replacement must not be done. private String isaTabFolder; private List<Exception> exceptions = new ArrayList<>(); public String getPublicDate() { if (publicDate == null) publicDate = ""; return publicDate; } public void setPublicDate(Date publicReleaseDate) { this.publicDate = IsaTab2MetaboLightsConverter.date2IsaTabDate(publicReleaseDate); } public String getSubmissionDate() { return submissionDate; } public void setSubmissionDate(Date submissionDate) { this.submissionDate = IsaTab2MetaboLightsConverter.date2IsaTabDate(submissionDate); } public String getStudyIdentifier() { return studyIdentifier; } public void setStudyIdentifier(String studyIdentifier) { this.studyIdentifier = studyIdentifier; } public IsaTabReplacer(String isaTabFolder) { this.isaTabFolder = isaTabFolder; } public IsaTabReplacer() { try { loadProperties(); } catch (Exception e) { logger.error("ERROR: Could not load properties file when instantiation IsaTabIdReplacer()!"); } } //IsaTabArchive properties public String getIsaTabFolder() { return isaTabFolder; } public void setIsaTabFolder(String isaTabFolder) { this.isaTabFolder = isaTabFolder; } private static void loadProperties() throws FileNotFoundException, IOException, ConfigurationException { //final String PROPS_FILE = "isatabidreplacer.properties"; final String PROPS_FILE = "idreplacer.properties"; //If properties are loaded if (!props.isEmpty()) { return; } logger.info("Loading properties using getClassLoader().getResourceAsStream(" + PROPS_FILE + ")"); //Load the properties from the property file props.load(IsaTabReplacer.class.getClassLoader().getResourceAsStream(PROPS_FILE)); //If property file is empty if (props.size() == 0) { //Dereference props = null; //Throw an exception throw new ConfigurationException("The application.properties file has been found, but it is empty."); } //Initialise idList String ids = props.getProperty(PROP_IDS); pubDateStr = props.getProperty("isatab.publicReleaseDate"); subDateStr = props.getProperty("isatab.studySubDate"); metaboliteProfTypeStr = props.getProperty("isatab.profilingType"); metaboliteProfValueStr = props.getProperty("isatab.profilingValue"); newOntologyType = props.getProperty("isatab.newOntologyType"); newOntologyValue = props.getProperty("isatab.newOntologyValue"); logger.info(PROP_IDS + " property retrieved :" + ids + "," + pubDateStr + "," + subDateStr); //Split it by ; to go through the array idList = ids.split(";"); //Initialize fileWithIds fileWithIds = props.getProperty(PROP_FILE_WITH_IDS); sampleFile = props.getProperty("isatab.sampleFile"); } public void validateIsaTabArchive() throws IsaTabIdReplacerException { String[] msgs = new String[2]; String msg; //Create a File object File isatab = new File(isaTabFolder); //If file does not exists if (!isatab.exists()) { //Add the error to msg msgs[1] = "File " + isaTabFolder + " does not exists."; } //File must be a folder, if not if (!isatab.isDirectory()) { //Add the error to msg msgs[0] = isatab.getName() + " is not a directory.\n"; } //If there are messages (errors) msg = org.apache.commons.lang.StringUtils.join(msgs); //If there is any message... if (!msg.equals("")) { //Throw customize exception... IsaTabIdReplacerException e = new IsaTabIdReplacerException("Invalid ISA Tab File:\n", msgs); throw e; } } public boolean execute() throws Exception { try { logger.info("Starting id replacement process"); //Load properties logger.info("Loading properties"); loadProperties(); //Validate logger.info("Validating the archive"); validateIsaTabArchive(); //Replace id logger.info("Replace study id and study dates"); validateAndReplaceValuesInInvestigationFile(); return (exceptions.size() == 0); } catch (Exception e) { throw e; } } private void annotateError(Exception e) { logger.error(e.getMessage()); exceptions.add(e); } private void annotateError(String message) { Exception exception = new IsaTabException(message); annotateError(exception); } private void validateAndReplaceValuesInInvestigationFile() { try { // Get the investigation file File isaTabFile = getISAtabFile(fileWithIds); logger.info("Loading investigation file " + isaTabFile.getName()); // Replace the id replaceInFile(isaTabFile); } catch (Exception e) { annotateError(e); } } /** * @return ISAtab file based on the given filename pattern * @throws java.io.IOException * @throws javax.naming.ConfigurationException */ private File getISAtabFile(String filePattern) throws ConfigurationException, IOException { //Search for the requested file File isaFolder = new File(isaTabFolder); File[] fileList; // Load properties loadProperties(); //Define a filename filter FileFilter filter = new RegexFileFilter(filePattern); //Get the file list filtered fileList = isaFolder.listFiles(filter); //If there is not a file... if (fileList.length == 0 || fileList == null) { throw new FileNotFoundException("File (" + filePattern + ") not found"); } //There must be only one, so take the first return fileList[0]; //Be aware that this only works with investigation (i_) and sample (s_) files } /** * Replaces Id in a single file. Goes through each line and replace the id if it's the correct line. * @param fileWithId * @throws Exception */ private void replaceInFile(File fileWithId) { logger.info("Reading investigation file -->" + fileWithId.getAbsolutePath()); // Reset number of studies. studyNumber = 0; boolean wrongStudyNumber = false; try { //Use a buffered reader BufferedReader reader = new BufferedReader(new FileReader(fileWithId)); String line = "", text = ""; //Go through the file while ((line = reader.readLine()) != null) { if (!checkIfMetaboliteProfiling(line)) { //Check if this is metabolite profiling String errTxt = "Sorry, only metabolite profiling is accepted in MetaboLights"; //Todo, read error text from properties annotateError(errTxt); } if (studyNumber > 1 && !wrongStudyNumber) { //If we already have assigned a study, fail the upload String errTxt = "Sorry, Only one study per submission accepted in MetaboLights"; //Todo, read error text from properties annotateError(errTxt); wrongStudyNumber = true; } if (!newOntologyUsed(line)) { String errTxt = ""; errTxt = "This study does not conform to the current ontology setup requirements or it was created with an old version of ISAcreator."; errTxt = errTxt + " Please download the latest version of our ISAcreator bundle and update your study. "; errTxt = errTxt + " Download here: ftp://ftp.ebi.ac.uk/pub/databases/metabolights/submissionTool/ISAcreatorMetaboLights.zip"; errTxt = errTxt + validateError; annotateError(errTxt); } //Replace Id in line (it could come with their own identifier, since we now accept those with the same initial identifier), also check for multiple studies reported //Pass in the accession number to use for both study and investigation accession (same id per submission) line = replaceIdInLine(line); //Replace public release date for this study line = replacePubRelDateInLine(line); //Replace study submission date for this study line = replaceSubmitDateInLine(line); //Add the final carriage return and line feed text += line + "\r\n"; } //Close the reader reader.close(); //Save the file // NOT we are not making a back up here!! If needed we will need to call //FileAuditUtil.backUpAuditedFolder(fileWithId.getParent()); if (replacing()) FileUtil.String2File(text, fileWithId.getPath(), false); } catch (FileNotFoundException e) { annotateError(e); } catch (IOException e) { annotateError(e); } } private boolean replacing() { return (studyIdentifier != null); } private String getAccessionNumber() throws DAOException { AccessionDAO accessionDAO = DAOFactory.getInstance().getAccessionDAO(); // Using default prefix...we should change this to allow DEV IDs. return accessionDAO.getStableId(); } private String replaceIdInLine(String line) { //For each id... for (int i = 0; i < idList.length; i++) { //Get the value (Study Identifier, Investigation Identifier) String id = idList[i]; //If the value is present in line, in the first position. if (line.indexOf(id) == 0) { logger.info("Line with identifiers found: " + line); // If studyIdentifier is null, don't need to replace it if (replacing()) { //Get the Id Value (i.e.: BII-1-S) String idInitialValue = StringUtils.replace(line, id + "\t\"", ""); idInitialValue = StringUtils.truncate(idInitialValue); //Compose the line: Study Identifier "MTBLS1" line = id + "\t\"" + studyIdentifier + "\""; logger.info("Study identifier " + idInitialValue + " replaced with " + studyIdentifier); } //If the value is a study identifier //This is necessary for the uploading using command line tools. //The accession number list will be used to assign permissions. //Permissions can only be done to Study Identifier elements. //Only Study Identifier can be linked. if ("Study Identifier".equals(id)) { ++studyNumber; //Count how many study id's we have processed } return line; } } return line; } /* * String replace the public release date in i_investigation.txt file */ private String replacePubRelDateInLine(String line) { // If public release date is null do nothing if (publicDate == null) return line; //If the value is present in line, in the first position. if (line.indexOf(pubDateStr) == 0) { logger.info(pubDateStr + " found in line " + line); //Compose the line:Study Public Release Date "10/03/2009" String newLine = pubDateStr + "\t\"" + getPublicDate() + "\""; return newLine; } else { return line; } } /* * String replace the MetaboLights submission date in i_investigation.txt file */ private String replaceSubmitDateInLine(String line) { // If submission date is null do nothing. if (submissionDate == null) return line; //If the value is present in line, in the first position. if (line.indexOf(subDateStr) == 0) { logger.info(subDateStr + " found in line " + line); //Compose the line:Study Submission Date "30/04/2007" String newLine = subDateStr + "\t\"" + getSubmissionDate() + "\""; return newLine; } else { return line; } } /* * Check if the this is a metabolite profiled study, read from i_investigation.txt file */ private Boolean checkIfMetaboliteProfiling(String line) { //Is this metabolite profiling type and the value is metabolite profiling if (line.indexOf(metaboliteProfTypeStr + "\t") == 0 && !line.contains(metaboliteProfValueStr)) { logger.error("'" + metaboliteProfTypeStr + "\t" + "' found, but no '" + metaboliteProfValueStr + "' in line: " + line); return false; } return true; //Not the correct line or correct type/value combo } private Boolean newOntologyUsed(String line) { //Has this study been created using the new ontology references? ISAcreator 1.7.5+ if (line.indexOf(newOntologyType + "\t") == 0 && !line.contains(newOntologyValue)) { logger.error("'" + newOntologyType + "\t" + "' found, no new ontology referenced '" + newOntologyValue + "' in line: " + line); return false; } return true; //Not the correct line or correct type/value combo } /** * Replaces values in an ISATab file using the replacementHash, * <LI> it goes through the file</LI> * <LI> search for any field in replacement.keys</LI> * <LI> replaces it with correspondent value</LI> * @param replacementHash: Hash where the key is the Tag to search for and the value is the value to write. * @throws Exception */ public void replaceFields(HashMap<String, String> replacementHash) throws Exception { // Get the investigation file File isaTabFile = getISAtabFile(fileWithIds); // Replace the id replaceFieldsInFile(isaTabFile, replacementHash); } private void replaceFieldsInFile(File fileWithId, HashMap<String, String> replacementHash) throws Exception { logger.info("Replacing fields in file -->" + fileWithId.getAbsolutePath()); try { //Use a buffered reader BufferedReader reader = new BufferedReader(new FileReader(fileWithId)); String line = "", text = ""; //Go through the file while ((line = reader.readLine()) != null) { //Replace fields in file line = replaceFieldsInLine(line, replacementHash); //Add the final carriage return and line feed text += line + "\r\n"; } //Close the reader reader.close(); //Save the file FileUtil.String2File(text, fileWithId.getPath(), false); } catch (Exception e) { throw e; } } public String getFieldInLine(String line) { int tabPos = line.indexOf("\t"); // If there isn't any tab if (tabPos == -1) { return null; } else { return line.substring(0, tabPos); } } public String getValueInLine(String line) { int tabPos = line.indexOf("\t"); // If there isn't any tab if (tabPos == -1) { return null; } else { // Return the value without double quotes return line.substring(tabPos + 2, line.length() - 1); } } private String replaceFieldsInLine(String line, HashMap<String, String> replacementHash) { // Get the field of the line String field = getFieldInLine(line); // If the line has a field if (field != null) { // If the field is present in the hash if (replacementHash.containsKey(field)) { // Get the value String value = replacementHash.get(field); logger.info("Field found: " + field + " in line " + line + ". Replacing value with " + value); line = field + "\t\"" + value + "\""; } } // Return the line return line; } /** * Get the values corresponding to the fields passed as parameter. Its a basic method, * @param fields * @return HashMap with the field (key) and the value(value). In case of the field exists twice in the file it will return unpredicted results. * @throws Exception */ public Map<String, String> getFields(String[] fields) throws Exception { // Get the investigation file File isaTabFile = getISAtabFile(fileWithIds); // Get Fields in file return getFieldsInFile(isaTabFile, fields); } private Map<String, String> getFieldsInFile(File fileWithId, String[] fields) throws Exception { logger.info("Getting fields in file -->" + fileWithId.getAbsolutePath()); try { //Use a buffered reader BufferedReader reader = new BufferedReader(new FileReader(fileWithId)); String line = ""; HashMap<String, String> result = new HashMap<String, String>(); // Convert the array into a Set Set<String> fieldsSet = new HashSet<String>(Arrays.asList(fields)); //Go through the file while ((line = reader.readLine()) != null) { //Replace fields in file getFieldsInLine(line, fieldsSet, result); } //Close the reader reader.close(); return result; } catch (Exception e) { throw e; } } private String getFieldsInLine(String line, Set<String> fields, HashMap<String, String> result) { // Get the field of the line String field = getFieldInLine(line); // If the line has a field if (field != null) { // If the field is present in the field set if (fields.contains(field)) { // Insert the value into the result Hash String value = getValueInLine(line); logger.info("Field found: " + field + " in line " + line + ". Getting value: " + value); //Add a new entry result.put(field, value); } } // Return the line return line; } public List<Exception> getExceptions() { return exceptions; } }