act.installer.reachablesexplorer.WikiWebServicesExporter.java Source code

Java tutorial

Introduction

Here is the source code for act.installer.reachablesexplorer.WikiWebServicesExporter.java

Source

/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package act.installer.reachablesexplorer;

import com.act.utils.CLIUtil;
import com.act.utils.TSVWriter;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.mongojack.DBCursor;
import org.mongojack.DBQuery;
import org.mongojack.JacksonDBCollection;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/**
 * This class outputs a TSV file that can be consumed by various web services that support the reachables wiki.
 */
public class WikiWebServicesExporter {
    private static final Logger LOGGER = LogManager.getFormatterLogger(WikiWebServicesExporter.class);

    private static final List<String> HEADER = Arrays.asList("inchi", "inchi_key", "display_name", "image_name");

    private static final String OPTION_INPUT_DB = "d";
    private static final String OPTION_INPUT_DB_HOST = "H";
    private static final String OPTION_INPUT_DB_PORT = "p";
    private static final String OPTION_INPUT_DB_COLLECTION = "c";
    private static final String OPTION_OUTPUT_FILE = "o";
    private static final String OPTION_EXPORT_SOME = "m";
    private static final String OPTION_INPUT_SEQUENCE_COLLECTION = "s";

    private static final String DEFAULT_HOST = "localhost";
    private static final String DEFAULT_PORT = "27017";
    // TODO change the defaults to something more plain/easy to know why they are like they are (No version numbers etc.)
    private static final String DEFAULT_DB = "wiki_reachables";
    private static final String DEFAULT_COLLECTION = "reachablesv7";
    private static final String DEFAULT_SEQUENCES_COLLECTION = "sequencesv7";
    private static final String DEFAULT_RENDERING_CACHE = "/tmp";

    // We wont touch this db, but need it for the Loader's constructor
    private static final String UNUSED_SOURCE_DB = "dummy_source_db";

    private static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() {
        {
            add(Option.builder(OPTION_INPUT_DB).argName("db name")
                    .desc(String.format("The name of the reachables database to read (default: %s)", DEFAULT_DB))
                    .hasArg().longOpt("db"));
            add(Option
                    .builder(OPTION_INPUT_DB_HOST).argName("db host").desc(String
                            .format("The host to which connect when reading from a DB (default: %s)", DEFAULT_HOST))
                    .hasArg().longOpt("host"));
            add(Option
                    .builder(OPTION_INPUT_DB_PORT).argName("db port").desc(String
                            .format("The port to which connect when reading from a DB (default: %s)", DEFAULT_PORT))
                    .hasArg().longOpt("port"));
            add(Option.builder(OPTION_INPUT_DB_COLLECTION).argName("collection")
                    .desc(String.format("The collection from which to read reachables documents (default: %s)",
                            DEFAULT_COLLECTION))
                    .hasArg().longOpt("collection"));
            add(Option.builder(OPTION_OUTPUT_FILE).argName("file name").desc("The name of the output tsv to write")
                    .hasArg().required().longOpt("out"));
            add(Option.builder(OPTION_INPUT_SEQUENCE_COLLECTION).argName("sequence-collection")
                    .desc("The sequence collection that should be used for export.").hasArg()
                    .longOpt("sequence-collection"));
            add(Option.builder(OPTION_EXPORT_SOME).argName("ids")
                    .desc("Only export molecules with the specified ids").hasArgs().valueSeparator(',')
                    .longOpt("only-export"));
        }
    };

    private static final String HELP_MESSAGE = StringUtils.join(new String[] {
            "This class write the contents of a Reachables collection as a TSV that can be consumed by the substructure ",
            "search service" }, "");
    private static final HelpFormatter HELP_FORMATTER = new HelpFormatter();
    static {
        HELP_FORMATTER.setWidth(100);
    }

    public static void main(String[] args) throws Exception {
        CLIUtil cliUtil = new CLIUtil(WikiWebServicesExporter.class, HELP_MESSAGE, OPTION_BUILDERS);
        CommandLine cl = cliUtil.parseCommandLine(args);

        String host = cl.getOptionValue(OPTION_INPUT_DB_HOST, DEFAULT_HOST);
        Integer port = Integer.parseInt(cl.getOptionValue(OPTION_INPUT_DB_PORT, DEFAULT_PORT));
        String dbName = cl.getOptionValue(OPTION_INPUT_DB, DEFAULT_DB);
        String collection = cl.getOptionValue(OPTION_INPUT_DB_COLLECTION, DEFAULT_COLLECTION);
        String sequenceCollection = cl.getOptionValue(OPTION_INPUT_SEQUENCE_COLLECTION,
                DEFAULT_SEQUENCES_COLLECTION);

        LOGGER.info("Attempting to connect to DB %s:%d/%s, collection %s", host, port, dbName, collection);
        Loader loader = new Loader(host, port, UNUSED_SOURCE_DB, dbName, collection, sequenceCollection,
                DEFAULT_RENDERING_CACHE);

        JacksonDBCollection<Reachable, String> reachables = loader.getJacksonReachablesCollection();

        LOGGER.info("Connected to DB, reading reachables");

        List<Long> exportIds = !cl.hasOption(OPTION_EXPORT_SOME) ? Collections.emptyList()
                : Arrays.stream(cl.getOptionValues(OPTION_EXPORT_SOME)).map(Long::valueOf)
                        .collect(Collectors.toList());

        TSVWriter<String, String> tsvWriter = new TSVWriter<>(HEADER);
        tsvWriter.open(new File(cl.getOptionValue(OPTION_OUTPUT_FILE)));
        try {
            DBCursor<Reachable> cursor = exportIds.isEmpty() ? reachables.find()
                    : reachables.find(DBQuery.in("_id", exportIds));
            int written = 0;
            while (cursor.hasNext()) {
                final Reachable r = cursor.next();

                Map<String, String> row = new HashMap<String, String>() {
                    {
                        put("inchi", r.getInchi());
                        put("inchi_key", r.getInchiKey());
                        put("display_name", r.getPageName());
                        put("image_name", r.getStructureFilename());
                    }
                };
                tsvWriter.append(row);
                tsvWriter.flush();
                written++;
            }
            LOGGER.info("Wrote %d reachables to output TSV", written);
        } finally {
            tsvWriter.close();
        }
    }
}