Java tutorial
import blast.specification.BLASTIdentifier; import blast.specification.TUITBLASTIdentifierDB; import blast.specification.TUITBLASTIdentifierRAM; import blast.specification.cutoff.TUITCutoffSet; import db.mysql.MySQL_Connector; import db.ram.RamDb; import exception.TUITPropertyBadFormatException; import format.fasta.nucleotide.NucleotideFasta; import helper.CombinatorFileOperator; import helper.NCBITablesDeployer; import helper.reduce.ReductorFileOperator; import io.file.NucleotideFastaTUITFileOperator; import io.file.TUITFileOperator; import io.file.TUITFileOperatorHelper; import io.properties.jaxb.Database; import io.properties.jaxb.SpecificationParameters; import io.properties.jaxb.TUITProperties; import io.properties.load.TUITPropertiesLoader; import logger.Log; import org.apache.commons.cli.*; import org.xml.sax.SAXException; import taxonomy.Ranks; import toolkit.reduce.NucleotideFastaSequenceReductor; import toolkit.reduce.hmptree.TreeFormatter; import javax.xml.bind.JAXBException; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.sql.Connection; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.logging.Level; /** * Taxonomic Unit Identification Tool (TUIT) is a free open source platform independent * software for accurate taxonomic classification of nucleotide sequences. * Copyright (C) 2013 Alexander Tuzhikov, Alexander Panchin and Valery Shestopalov. * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /** * A main class for the tuit module implementation */ public class tuit { private static String licence = "\n" + "\nTHANK YOU FOR USING THE TAXONOMIC UNIT IDENTIFICATION TOOL\n\n" + "Taxonomic Unit Identification Tool Copyright (C) 2013 Alexander Tuzhikov, Alexander Panchin and Valery Shestopalov\n" + " This program comes with ABSOLUTELY NO WARRANTY; for details see LICENCE.\n" + " This is free software, and you are welcome to redistribute it\n" + " under certain conditions; see LICENCE for details."; /** * The -in flag for the input file */ private final static String IN = "i"; /** * The -out flag for the output file */ private final static String OUT = "o"; /** * The -p flag for the properties file */ private final static String P = "p"; /** * The -b flag for the pre-blasted BLAST output file */ private final static String B = "b"; /** * The -v flag for the verbose output file */ private final static String V = "v"; /** * The -reduce flag */ private final static String COMBINE = "combine"; /** * The -normalize flag in combination with COMBINE */ private final static String NORMALIZE = "n"; /** * The -combine flag */ private final static String REDUCE = "reduce"; /** * The -deploy flag */ private final static String DEPLOY = "deploy"; /** * The -update flag */ private final static String UPDATE = "update"; /** * tuit output file extension */ private final static String TUIT_EXT = ".tuit"; /** * Determines whether tuit should use the ram-mapped taxonomic database, or connect to a slower mysql version */ private final static String USE_DB = "usedb"; /** * A serialized object for the taxonomic database */ private final static String RAM_DB = "ramdb.obj"; @SuppressWarnings("ConstantConditions") public static void main(String[] args) { System.out.println(licence); //Declare variables File inputFile; File outputFile; File tmpDir; File blastnExecutable; File properties; File blastOutputFile = null; // TUITPropertiesLoader tuitPropertiesLoader; TUITProperties tuitProperties; // String[] parameters = null; // Connection connection = null; MySQL_Connector mySQL_connector; // Map<Ranks, TUITCutoffSet> cutoffMap; // BLASTIdentifier blastIdentifier = null; // RamDb ramDb = null; CommandLineParser parser = new GnuParser(); Options options = new Options(); options.addOption(tuit.IN, "input<file>", true, "Input file (currently fasta-formatted only)"); options.addOption(tuit.OUT, "output<file>", true, "Output file (in " + tuit.TUIT_EXT + " format)"); options.addOption(tuit.P, "prop<file>", true, "Properties file (XML formatted)"); options.addOption(tuit.V, "verbose", false, "Enable verbose output"); options.addOption(tuit.B, "blast_output<file>", true, "Perform on a pre-BLASTed output"); options.addOption(tuit.DEPLOY, "deploy", false, "Deploy the taxonomic databases"); options.addOption(tuit.UPDATE, "update", false, "Update the taxonomic databases"); options.addOption(tuit.USE_DB, "usedb", false, "Use RDBMS instead of RAM-based taxonomy"); Option option = new Option(tuit.REDUCE, "reduce", true, "Pack identical (100% similar sequences) records in the given sample file"); option.setArgs(Option.UNLIMITED_VALUES); options.addOption(option); option = new Option(tuit.COMBINE, "combine", true, "Combine a set of given reduction files into an HMP Tree-compatible taxonomy"); option.setArgs(Option.UNLIMITED_VALUES); options.addOption(option); options.addOption(tuit.NORMALIZE, "normalize", false, "If used in combination with -combine ensures that the values are normalized by the root value"); HelpFormatter formatter = new HelpFormatter(); try { //Get TUIT directory final File tuitDir = new File( new File(tuit.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath()) .getParent()); final File ramDbFile = new File(tuitDir, tuit.RAM_DB); //Setup logger Log.getInstance().setLogName("tuit.log"); //Read command line final CommandLine commandLine = parser.parse(options, args, true); //Check if the REDUCE option is on if (commandLine.hasOption(tuit.REDUCE)) { final String[] fileList = commandLine.getOptionValues(tuit.REDUCE); for (String s : fileList) { final Path path = Paths.get(s); Log.getInstance().log(Level.INFO, "Processing " + path.toString() + "..."); final NucleotideFastaSequenceReductor nucleotideFastaSequenceReductor = NucleotideFastaSequenceReductor .fromPath(path); ReductorFileOperator.save(nucleotideFastaSequenceReductor, path.resolveSibling(path.getFileName().toString() + ".rdc")); } Log.getInstance().log(Level.FINE, "Task done, exiting..."); return; } //Check if COMBINE is on if (commandLine.hasOption(tuit.COMBINE)) { final boolean normalize = commandLine.hasOption(tuit.NORMALIZE); final String[] fileList = commandLine.getOptionValues(tuit.COMBINE); //TODO: implement a test for format here final List<TreeFormatter.TreeFormatterFormat.HMPTreesOutput> hmpTreesOutputs = new ArrayList<>(); final TreeFormatter treeFormatter = TreeFormatter .newInstance(new TreeFormatter.TuitLineTreeFormatterFormat()); for (String s : fileList) { final Path path = Paths.get(s); Log.getInstance().log(Level.INFO, "Merging " + path.toString() + "..."); treeFormatter.loadFromPath(path); final TreeFormatter.TreeFormatterFormat.HMPTreesOutput output = TreeFormatter.TreeFormatterFormat.HMPTreesOutput .newInstance(treeFormatter.toHMPTree(normalize), s.substring(0, s.indexOf("."))); hmpTreesOutputs.add(output); treeFormatter.erase(); } final Path destination; if (commandLine.hasOption(OUT)) { destination = Paths.get(commandLine.getOptionValue(tuit.OUT)); } else { destination = Paths.get("merge.tcf"); } CombinatorFileOperator.save(hmpTreesOutputs, treeFormatter, destination); Log.getInstance().log(Level.FINE, "Task done, exiting..."); return; } if (!commandLine.hasOption(tuit.P)) { throw new ParseException("No properties file option found, exiting."); } else { properties = new File(commandLine.getOptionValue(tuit.P)); } //Load properties tuitPropertiesLoader = TUITPropertiesLoader.newInstanceFromFile(properties); tuitProperties = tuitPropertiesLoader.getTuitProperties(); //Create tmp directory and blastn executable tmpDir = new File(tuitProperties.getTMPDir().getPath()); blastnExecutable = new File(tuitProperties.getBLASTNPath().getPath()); //Check for deploy if (commandLine.hasOption(tuit.DEPLOY)) { if (commandLine.hasOption(tuit.USE_DB)) { NCBITablesDeployer.fastDeployNCBIDatabasesFromNCBI(connection, tmpDir); } else { NCBITablesDeployer.fastDeployNCBIRamDatabaseFromNCBI(tmpDir, ramDbFile); } Log.getInstance().log(Level.FINE, "Task done, exiting..."); return; } //Check for update if (commandLine.hasOption(tuit.UPDATE)) { if (commandLine.hasOption(tuit.USE_DB)) { NCBITablesDeployer.updateDatabasesFromNCBI(connection, tmpDir); } else { //No need to specify a different way to update the database other than just deploy in case of the RAM database NCBITablesDeployer.fastDeployNCBIRamDatabaseFromNCBI(tmpDir, ramDbFile); } Log.getInstance().log(Level.FINE, "Task done, exiting..."); return; } //Connect to the database if (commandLine.hasOption(tuit.USE_DB)) { mySQL_connector = MySQL_Connector.newDefaultInstance( "jdbc:mysql://" + tuitProperties.getDBConnection().getUrl().trim() + "/", tuitProperties.getDBConnection().getLogin().trim(), tuitProperties.getDBConnection().getPassword().trim()); mySQL_connector.connectToDatabase(); connection = mySQL_connector.getConnection(); } else { //Probe for ram database if (ramDbFile.exists() && ramDbFile.canRead()) { Log.getInstance().log(Level.INFO, "Loading RAM taxonomic map..."); try { ramDb = RamDb.loadSelfFromFile(ramDbFile); } catch (IOException ie) { if (ie instanceof java.io.InvalidClassException) throw new IOException("The RAM-based taxonomic database needs to be updated."); } } else { Log.getInstance().log(Level.SEVERE, "The RAM database either has not been deployed, or is not accessible." + "Please use the --deploy option and check permissions on the TUIT directory. " + "If you were looking to use the RDBMS as a taxonomic reference, plese use the -usedb option."); return; } } if (commandLine.hasOption(tuit.B)) { blastOutputFile = new File(commandLine.getOptionValue(tuit.B)); if (!blastOutputFile.exists() || !blastOutputFile.canRead()) { throw new Exception("BLAST output file either does not exist, or is not readable."); } else if (blastOutputFile.isDirectory()) { throw new Exception("BLAST output file points to a directory."); } } //Check vital parameters if (!commandLine.hasOption(tuit.IN)) { throw new ParseException("No input file option found, exiting."); } else { inputFile = new File(commandLine.getOptionValue(tuit.IN)); Log.getInstance().setLogName(inputFile.getName().split("\\.")[0] + ".tuit.log"); } //Correct the output file option if needed if (!commandLine.hasOption(tuit.OUT)) { outputFile = new File((inputFile.getPath()).split("\\.")[0] + tuit.TUIT_EXT); } else { outputFile = new File(commandLine.getOptionValue(tuit.OUT)); } //Adjust the output level if (commandLine.hasOption(tuit.V)) { Log.getInstance().setLevel(Level.FINE); Log.getInstance().log(Level.INFO, "Using verbose output for the log"); } else { Log.getInstance().setLevel(Level.INFO); } //Try all files if (inputFile != null) { if (!inputFile.exists() || !inputFile.canRead()) { throw new Exception("Input file either does not exist, or is not readable."); } else if (inputFile.isDirectory()) { throw new Exception("Input file points to a directory."); } } if (!properties.exists() || !properties.canRead()) { throw new Exception("Properties file either does not exist, or is not readable."); } else if (properties.isDirectory()) { throw new Exception("Properties file points to a directory."); } //Create blast parameters final StringBuilder stringBuilder = new StringBuilder(); for (Database database : tuitProperties.getBLASTNParameters().getDatabase()) { stringBuilder.append(database.getUse()); stringBuilder.append(" ");//Gonna insert an extra space for the last database } String remote; String entrez_query; if (tuitProperties.getBLASTNParameters().getRemote().getDelegate().equals("yes")) { remote = "-remote"; entrez_query = "-entrez_query"; parameters = new String[] { "-db", stringBuilder.toString(), remote, entrez_query, tuitProperties.getBLASTNParameters().getEntrezQuery().getValue(), "-evalue", tuitProperties.getBLASTNParameters().getExpect().getValue() }; } else { if (!commandLine.hasOption(tuit.B)) { if (tuitProperties.getBLASTNParameters().getEntrezQuery().getValue().toUpperCase() .startsWith("NOT") || tuitProperties.getBLASTNParameters().getEntrezQuery().getValue().toUpperCase() .startsWith("ALL")) { parameters = new String[] { "-db", stringBuilder.toString(), "-evalue", tuitProperties.getBLASTNParameters().getExpect().getValue(), "-negative_gilist", TUITFileOperatorHelper.restrictToEntrez(tmpDir, tuitProperties.getBLASTNParameters().getEntrezQuery().getValue() .toUpperCase().replace("NOT", "OR")) .getAbsolutePath(), "-num_threads", tuitProperties.getBLASTNParameters().getNumThreads().getValue() }; } else if (tuitProperties.getBLASTNParameters().getEntrezQuery().getValue().toUpperCase() .equals("")) { parameters = new String[] { "-db", stringBuilder.toString(), "-evalue", tuitProperties.getBLASTNParameters().getExpect().getValue(), "-num_threads", tuitProperties.getBLASTNParameters().getNumThreads().getValue() }; } else { parameters = new String[] { "-db", stringBuilder.toString(), "-evalue", tuitProperties.getBLASTNParameters().getExpect().getValue(), /*"-gilist", TUITFileOperatorHelper.restrictToEntrez( tmpDir, tuitProperties.getBLASTNParameters().getEntrezQuery().getValue()).getAbsolutePath(),*/ //TODO remove comment!!!!! "-num_threads", tuitProperties.getBLASTNParameters().getNumThreads().getValue() }; } } } //Prepare a cutoff Map if (tuitProperties.getSpecificationParameters() != null && tuitProperties.getSpecificationParameters().size() > 0) { cutoffMap = new HashMap<Ranks, TUITCutoffSet>(tuitProperties.getSpecificationParameters().size()); for (SpecificationParameters specificationParameters : tuitProperties .getSpecificationParameters()) { cutoffMap.put(Ranks.valueOf(specificationParameters.getCutoffSet().getRank()), TUITCutoffSet.newDefaultInstance( Double.parseDouble( specificationParameters.getCutoffSet().getPIdentCutoff().getValue()), Double.parseDouble(specificationParameters.getCutoffSet() .getQueryCoverageCutoff().getValue()), Double.parseDouble( specificationParameters.getCutoffSet().getAlpha().getValue()))); } } else { cutoffMap = new HashMap<Ranks, TUITCutoffSet>(); } final TUITFileOperatorHelper.OutputFormat format; if (tuitProperties.getBLASTNParameters().getOutputFormat().getFormat().equals("rdp")) { format = TUITFileOperatorHelper.OutputFormat.RDP_FIXRANK; } else { format = TUITFileOperatorHelper.OutputFormat.TUIT; } try (TUITFileOperator<NucleotideFasta> nucleotideFastaTUITFileOperator = NucleotideFastaTUITFileOperator .newInstance(format, cutoffMap);) { nucleotideFastaTUITFileOperator.setInputFile(inputFile); nucleotideFastaTUITFileOperator.setOutputFile(outputFile); final String cleanupString = tuitProperties.getBLASTNParameters().getKeepBLASTOuts().getKeep(); final boolean cleanup; if (cleanupString.equals("no")) { Log.getInstance().log(Level.INFO, "Temporary BLAST files will be deleted."); cleanup = true; } else { Log.getInstance().log(Level.INFO, "Temporary BLAST files will be kept."); cleanup = false; } //Create blast identifier ExecutorService executorService = Executors.newSingleThreadExecutor(); if (commandLine.hasOption(tuit.USE_DB)) { if (blastOutputFile == null) { blastIdentifier = TUITBLASTIdentifierDB.newInstanceFromFileOperator(tmpDir, blastnExecutable, parameters, nucleotideFastaTUITFileOperator, connection, cutoffMap, Integer.parseInt( tuitProperties.getBLASTNParameters().getMaxFilesInBatch().getValue()), cleanup); } else { try { blastIdentifier = TUITBLASTIdentifierDB.newInstanceFromBLASTOutput( nucleotideFastaTUITFileOperator, connection, cutoffMap, blastOutputFile, Integer.parseInt( tuitProperties.getBLASTNParameters().getMaxFilesInBatch().getValue()), cleanup); } catch (JAXBException e) { Log.getInstance().log(Level.SEVERE, "Error reading " + blastOutputFile.getName() + ", please check input. The file must be XML formatted."); } catch (Exception e) { e.printStackTrace(); } } } else { if (blastOutputFile == null) { blastIdentifier = TUITBLASTIdentifierRAM.newInstanceFromFileOperator(tmpDir, blastnExecutable, parameters, nucleotideFastaTUITFileOperator, cutoffMap, Integer.parseInt( tuitProperties.getBLASTNParameters().getMaxFilesInBatch().getValue()), cleanup, ramDb); } else { try { blastIdentifier = TUITBLASTIdentifierRAM.newInstanceFromBLASTOutput( nucleotideFastaTUITFileOperator, cutoffMap, blastOutputFile, Integer.parseInt( tuitProperties.getBLASTNParameters().getMaxFilesInBatch().getValue()), cleanup, ramDb); } catch (JAXBException e) { Log.getInstance().log(Level.SEVERE, "Error reading " + blastOutputFile.getName() + ", please check input. The file must be XML formatted."); } catch (Exception e) { e.printStackTrace(); } } } Future<?> runnableFuture = executorService.submit(blastIdentifier); runnableFuture.get(); executorService.shutdown(); } } catch (ParseException pe) { Log.getInstance().log(Level.SEVERE, (pe.getMessage())); formatter.printHelp("tuit", options); } catch (SAXException saxe) { Log.getInstance().log(Level.SEVERE, saxe.getMessage()); } catch (FileNotFoundException fnfe) { Log.getInstance().log(Level.SEVERE, fnfe.getMessage()); } catch (TUITPropertyBadFormatException tpbfe) { Log.getInstance().log(Level.SEVERE, tpbfe.getMessage()); } catch (ClassCastException cce) { Log.getInstance().log(Level.SEVERE, cce.getMessage()); } catch (JAXBException jaxbee) { Log.getInstance().log(Level.SEVERE, "The properties file is not well formatted. Please ensure that the XML is consistent with the io.properties.dtd schema."); } catch (ClassNotFoundException cnfe) { //Probably won't happen unless the library deleted from the .jar Log.getInstance().log(Level.SEVERE, cnfe.getMessage()); //cnfe.printStackTrace(); } catch (SQLException sqle) { Log.getInstance().log(Level.SEVERE, "A database communication error occurred with the following message:\n" + sqle.getMessage()); //sqle.printStackTrace(); if (sqle.getMessage().contains("Access denied for user")) { Log.getInstance().log(Level.SEVERE, "Please use standard database login: " + NCBITablesDeployer.login + " and password: " + NCBITablesDeployer.password); } } catch (Exception e) { Log.getInstance().log(Level.SEVERE, e.getMessage()); e.printStackTrace(); } finally { if (connection != null) { try { connection.close(); } catch (SQLException sqle) { Log.getInstance().log(Level.SEVERE, "Problem closing the database connection: " + sqle); } } Log.getInstance().log(Level.FINE, "Task done, exiting..."); } } }