edu.umn.msi.gx.mztosqlite.MzToSQLite.java Source code

Introduction

Here is the source code for edu.umn.msi.gx.mztosqlite.MzToSQLite.java
Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

package edu.umn.msi.gx.mztosqlite;

import java.io.File;
import java.io.IOException;
import static java.lang.System.exit;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.Parser;
import org.tmatesoft.sqljet.core.SqlJetException;

/**
 *
 * @author James E Johnson jj@umn.edu
 * @version
 */
public class MzToSQLite {
    Map<String, ProteomicsFormat> scanFiles = new HashMap<>();
    Map<String, ProteomicsFormat> identFiles = new HashMap<>();
    Map<String, ProteomicsFormat> seqDbFiles = new HashMap<>();
    String dbPath = null;
    String jsonPath = null;
    String tsvPath = null;
    boolean verbose = true;
    MzSQLiteDB mzSQLiteDB = null;

    public final void parseOptions(String[] args) {
        Integer MAX_INPUTS = 100;
        Parser parser = new BasicParser();
        String dbOpt = "sqlite";
        String inputFileOpt = "input";
        String inputNameOpt = "name";
        String inputIdOpt = "encoded_id";
        String verboseOpt = "verbose";
        String helpOpt = "help";
        Options options = new Options();
        options.addOption("s", dbOpt, true, "SQLite output file");
        options.addOption("v", verboseOpt, false, "verbose");
        options.addOption("h", helpOpt, false, "help");
        options.addOption("i", inputFileOpt, verbose, "input file");
        options.addOption("n", inputNameOpt, verbose, "name for input file");
        options.addOption("e", inputIdOpt, verbose, "encoded id for input file");
        options.addOption("f", inputIdOpt, verbose, "FASTA Search Database files");
        options.getOption(inputFileOpt).setArgs(MAX_INPUTS);
        options.getOption(inputNameOpt).setArgs(MAX_INPUTS);
        options.getOption(inputIdOpt).setArgs(MAX_INPUTS);
        // create the parser
        try {
            // parse the command line arguments
            CommandLine cli = parser.parse(options, args);
            if (cli.hasOption(helpOpt)) {
                HelpFormatter formatter = new HelpFormatter();
                formatter.printHelp("java -jar MzToSQLite.jar [options] [proteomics_data_file ...]", options);
                exit(0);
            }
            if (cli.hasOption(verboseOpt)) {
                verbose = true;
            }
            if (cli.hasOption(dbOpt)) {
                dbPath = cli.getOptionValue(dbOpt);
                mzSQLiteDB = new MzSQLiteDB(dbPath);
                try {
                    mzSQLiteDB.createTables();
                } catch (SqlJetException ex) {
                    Logger.getLogger(MzToSQLite.class.getName()).log(Level.SEVERE, null, ex);
                }
            }
            List<String> argList = cli.getArgList();
            if (argList != null) {
                for (String filePath : argList) {
                    File inputFile = new File(filePath);
                    if (inputFile.canRead()) {
                        try {
                            ProteomicsFormat format = ProteomicsFormat.getFormat(inputFile);
                            switch (format) {
                            case MZID:
                                identFiles.put(filePath, format);
                                break;
                            case MZML:
                            case MGF:
                            case DTA:
                            case MS2:
                            case PKL:
                            case MZXML:
                            case XML_FILE:
                            case MZDATA:
                            case PRIDEXML:
                                scanFiles.put(filePath, format);
                                break;
                            case FASTA:
                                seqDbFiles.put(filePath, format);
                                break;
                            case PEPXML:
                            case UNSUPPORTED:
                            default:
                                Logger.getLogger(MzToSQLite.class.getName()).log(Level.WARNING,
                                        "Unknown or unsupported format: {0}", filePath);
                                break;
                            }
                        } catch (IOException ex) {
                            Logger.getLogger(MzToSQLite.class.getName()).log(Level.SEVERE, null, ex);
                        }
                    } else {
                        Logger.getLogger(MzToSQLite.class.getName()).log(Level.WARNING, "Unable to read {0}",
                                filePath);
                    }
                }
            }
        } catch (ParseException exp) {
            Logger.getLogger(MzToSQLite.class.getName()).log(Level.SEVERE, null, exp);
        }

    }

    public void processFiles() {
        Map<String, Object> spectrumIdPkidMap = new HashMap<>();
        MzParserHandler handler = new MzParserHandler(this.mzSQLiteDB);
        for (String filepath : scanFiles.keySet()) {
            ProteomicsFormat format = scanFiles.get(filepath);
            Map<String, Object> source = new HashMap<>();
            source.put("name", filepath);
            source.put("location", filepath);
            source.put("format", format.toString());
            handler.handle("Source", source);
            try {
                MzSpectrumParser mzSpectrum = new MzSpectrumParser(filepath, format, spectrumIdPkidMap);
                mzSpectrum.parseSpectrum(handler);
            } catch (Exception ex) {
                Logger.getLogger(MzToSQLite.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        for (String filepath : identFiles.keySet()) {
            ProteomicsFormat format = identFiles.get(filepath);
            Map<String, Object> source = new HashMap<>();
            source.put("name", filepath);
            source.put("location", filepath);
            source.put("format", format.toString());
            handler.handle("Source", source);
            MzIdentParser mzIdentParser = new MzIdentParser(filepath, spectrumIdPkidMap);
            for (String fastapath : seqDbFiles.keySet()) {
                mzIdentParser.readFasta(fastapath);
            }
            mzIdentParser.parseIdent(handler);
        }
    }

    public static void main(String[] args) {
        try {
            MzToSQLite mzToSQLite = new MzToSQLite();
            mzToSQLite.parseOptions(args);
            mzToSQLite.processFiles();
        } catch (Exception ex) {
            Logger.getLogger(MzToSQLite.class.getName()).log(Level.SEVERE, null, ex);
            System.exit(1);
        }
    }

}