org.opencb.opencga.storage.variant.sqlite.VariantSqliteDBAdaptor.java Source code

Java tutorial

Introduction

Here is the source code for org.opencb.opencga.storage.variant.sqlite.VariantSqliteDBAdaptor.java

Source

/*
 * Copyright 2015 OpenCB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.opencb.opencga.storage.variant.sqlite;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.sun.jersey.api.client.Client;
import com.sun.jersey.api.client.WebResource;
import org.apache.commons.lang.StringUtils;
import org.opencb.commons.bioformats.variant.json.VariantAnalysisInfo;
import org.opencb.commons.bioformats.variant.json.VariantControl;
import org.opencb.commons.bioformats.variant.json.VariantInfo;
import org.opencb.commons.containers.QueryResult;
import org.opencb.commons.containers.map.ObjectMap;
import org.opencb.commons.containers.map.QueryOptions;
import org.opencb.opencga.core.auth.SqliteCredentials;
import org.opencb.opencga.core.common.XObject;
import org.opencb.opencga.storage.indices.SqliteManager;

import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.*;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.opencb.biodata.models.feature.Region;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.effect.VariantEffect;
import org.opencb.biodata.models.variant.stats.VariantStats;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor;

/**
 * @author Alejandro Aleman Ramos <aaleman@cipf.es>
 * @author Cristina Yenyxe Gonzalez Garcia <cgonzalez@cipf.es>
 */
public class VariantSqliteDBAdaptor implements VariantDBAdaptor {

    private SqliteCredentials sqliteCredentials;
    private SqliteManager sqliteManager;

    public VariantSqliteDBAdaptor() {
        System.out.println("Variant Query Maker");
    }

    public VariantSqliteDBAdaptor(SqliteCredentials sqliteCredentials) throws SQLException, ClassNotFoundException {
        System.out.println("Variant Query Maker");
        this.sqliteCredentials = sqliteCredentials;
        this.sqliteManager = new SqliteManager();
        this.sqliteManager.connect(this.sqliteCredentials.getPath(), true);

        System.out.println("DB: " + this.sqliteCredentials.getPath());
    }

    /**
     * @param region    The region where variants must be searched
     * @param studyName The name of the study where variants are filed
     * @param options   Optional arguments
     * @return
     */
    @Override
    public QueryResult getAllVariantsByRegionAndStudy(Region region, String studyName, QueryOptions options) {

        Long start, end, dbStart, dbEnd;
        start = System.currentTimeMillis();

        boolean includeSamples, includeStats, includeEffects;
        int pos, id = Integer.MIN_VALUE;
        String sql, chr, ref, alt;
        String columns = "variant.id_variant, variant.chromosome, variant.position, variant.ref, variant.alt,variant.id ";
        String joins = "";

        Variant variant = null;
        XObject elem;
        QueryResult<Variant> queryResult = new QueryResult<>(
                String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd()));

        List<Variant> results = new LinkedList<>();
        List<String> whereClauses = new ArrayList<>(10);

        if (!options.containsKey("samples") && !options.containsKey("stats") && !options.containsKey("effects")) {
            includeSamples = true;
            includeStats = true;
            includeEffects = true;
        } else {
            includeSamples = options.containsKey("samples") && options.getBoolean("samples");
            includeStats = options.containsKey("stats") && options.getBoolean("stats");
            includeEffects = options.containsKey("effects") && options.getBoolean("effects");
        }

        if (includeSamples) {
            columns += ",variant_info.key, variant_info.value ";
            joins += " left join variant_info on variant.id_variant=variant_info.id_variant ";
        }

        if (includeStats) {
            columns += ",variant_stats.maf, variant_stats.mgf, variant_stats.allele_maf , variant_stats.genotype_maf , variant_stats.miss_allele , variant_stats.miss_gt , variant_stats.mendel_err, variant_stats.is_indel , variant_stats.cases_percent_dominant , variant_stats.controls_percent_dominant , variant_stats.cases_percent_recessive , variant_stats.controls_percent_recessive ";
            joins += " inner join variant_stats on variant_stats.chromosome=variant.chromosome AND variant_stats.position=variant.position AND variant_stats.allele_ref=variant.ref AND variant_stats.allele_alt=variant.alt ";
        }

        if (includeEffects) {
            //            columns += "";
            //            joins += " inner join variant_effect on variant_effect.chromosome=variant.chromosome AND variant_effect.position=variant.position AND variant_effect.reference_allele=variant.ref AND variant_effect.alternative_allele = variant.alt ";

        }

        try {

            StringBuilder regionClauses = new StringBuilder();
            regionClauses.append("( variant.chromosome='").append(region.getChromosome()).append("' AND ");
            regionClauses.append("variant.position>=").append(String.valueOf(region.getStart())).append(" AND ");
            regionClauses.append("variant.position<=").append(String.valueOf(region.getEnd()));
            regionClauses.append(" ) ");
            whereClauses.add(regionClauses.toString());

            sql = "SELECT " + columns + " from variant " + joins;

            if (whereClauses.size() > 0) {
                StringBuilder where = new StringBuilder(" WHERE ");

                for (int i = 0; i < whereClauses.size(); i++) {
                    where.append(whereClauses.get(i));
                    if (i < whereClauses.size() - 1) {
                        where.append(" AND ");
                    }
                }

                sql += where.toString() + " ORDER BY variant.id_variant ";
            }

            sql += ";";

            dbStart = System.currentTimeMillis();
            Iterator<XObject> it = sqliteManager.queryIterator(sql);
            dbEnd = System.currentTimeMillis();

            queryResult.setDbTime(dbEnd - dbStart);

            while (it.hasNext()) {
                elem = it.next();

                if (id != elem.getInt("id_variant")) { // new Elem
                    id = elem.getInt("id_variant");
                    chr = elem.getString("chromosome");
                    pos = elem.getInt("position");
                    ref = elem.getString("ref");
                    alt = elem.getString("alt");

                    variant = new Variant(chr, pos, pos, ref, alt);

                    variant.setId(elem.getString("id"));
                    variant.setFormat(elem.getString("format"));

                    if (includeStats) {
                        variant.setStats(new VariantStats(chr, pos, ref, alt, elem.getDouble("maf"),
                                elem.getDouble("mgf"), elem.getString("allele_maf"), elem.getString("genotype_maf"),
                                elem.getInt("miss_allele"), elem.getInt("miss_gt"), elem.getInt("mendel_err"),
                                elem.getInt("is_indel") == 1, elem.getDouble("cases_percent_dominant"),
                                elem.getDouble("controls_percent_dominant"),
                                elem.getDouble("cases_percent_recessive"),
                                elem.getDouble("controls_percent_recessive")));
                    }
                    if (includeEffects) {
                        VariantEffect ve = new VariantEffect();
                    }

                    if (variant != null) {
                        results.add(variant);
                    }

                }

                if (elem.getString("key") != null && elem.getString("value") != null) {
                    variant.addAttribute(elem.getString("key"), elem.getString("value"));
                }

            }

            sqliteManager.disconnect(false);

        } catch (SQLException e) {
            System.err.println("getAllVariantsByRegion: " + e.getClass().getName() + ": " + e.getMessage());
        }

        //        System.out.println("Results");
        //        for (Variant v : results) {
        //            System.out.println(v);
        //        }

        queryResult.setResult(results);
        queryResult.setNumResults(results.size());
        end = System.currentTimeMillis();
        queryResult.setTime(end - start);
        return queryResult;
    }

    @Override
    public List<QueryResult> getAllVariantsByRegionList(List<Region> region, String studyName,
            QueryOptions options) {
        return null; // TODO aaleman: Implementation needed
    }

    @Override
    public QueryResult<ObjectMap> getVariantsHistogramByRegion(Region region, String studyName,
            boolean histogramLogarithm, int histogramMax) {
        QueryResult<ObjectMap> queryResult = new QueryResult<>(
                String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); // TODO Fill metadata
        List<ObjectMap> data = new ArrayList<>();

        long startTime = System.currentTimeMillis();

        Path metaDir = getMetaDir(sqliteCredentials.getPath());
        String fileName = sqliteCredentials.getPath().getFileName().toString();

        try {
            long startDbTime = System.currentTimeMillis();
            sqliteManager.connect(metaDir.resolve(Paths.get(fileName)), true);
            System.out.println("SQLite path: " + metaDir.resolve(Paths.get(fileName)).toString());
            String queryString = "SELECT * FROM chunk WHERE chromosome='" + region.getChromosome()
                    + "' AND start <= " + region.getEnd() + " AND end >= " + region.getStart();
            List<XObject> queryResults = sqliteManager.query(queryString);
            sqliteManager.disconnect(true);
            queryResult.setDbTime(System.currentTimeMillis() - startDbTime);

            int resultSize = queryResults.size();

            if (resultSize > histogramMax) { // Need to group results to fit maximum size of the histogram
                int sumChunkSize = resultSize / histogramMax;
                int i = 0, j = 0;
                int featuresCount = 0;
                ObjectMap item = null;

                for (XObject result : queryResults) {
                    featuresCount += result.getInt("features_count");
                    if (i == 0) {
                        item = new ObjectMap("chromosome", result.getString("chromosome"));
                        item.put("chunkId", result.getInt("chunk_id"));
                        item.put("start", result.getInt("start"));
                    } else if (i == sumChunkSize - 1 || j == resultSize - 1) {
                        if (histogramLogarithm) {
                            item.put("featuresCount", (featuresCount > 0) ? Math.log(featuresCount) : 0);
                        } else {
                            item.put("featuresCount", featuresCount);
                        }
                        item.put("end", result.getInt("end"));
                        data.add(item);
                        i = -1;
                        featuresCount = 0;
                    }
                    j++;
                    i++;
                }
            } else {
                for (XObject result : queryResults) {
                    ObjectMap item = new ObjectMap("chromosome", result.getString("chromosome"));
                    item.put("chunkId", result.getInt("chunk_id"));
                    item.put("start", result.getInt("start"));
                    if (histogramLogarithm) {
                        int features_count = result.getInt("features_count");
                        result.put("featuresCount", (features_count > 0) ? Math.log(features_count) : 0);
                    } else {
                        item.put("featuresCount", result.getInt("features_count"));
                    }
                    item.put("end", result.getInt("end"));
                    data.add(item);
                }
            }
        } catch (ClassNotFoundException | SQLException ex) {
            Logger.getLogger(VariantSqliteDBAdaptor.class.getName()).log(Level.SEVERE, null, ex);
            queryResult.setErrorMsg(ex.getMessage());
        }

        queryResult.setResult(data);
        queryResult.setNumResults(data.size());
        queryResult.setTime(System.currentTimeMillis() - startTime);

        return queryResult;
    }

    @Override
    public QueryResult getStatsByVariant(Variant variant, QueryOptions options) {
        return null; // TODO aaleman: Implementation needed
    }

    @Override
    public QueryResult getSimpleStatsByVariant(Variant variant, QueryOptions options) {
        return null; // TODO aaleman: Implementation needed
    }

    @Override
    public QueryResult getEffectsByVariant(Variant variant, QueryOptions options) {
        return null; // TODO aaleman: Implementation needed
    }

    @Override
    public List<VariantInfo> getRecords(Map<String, String> options) {
        Connection con;
        Statement stmt;
        List<VariantInfo> list = new ArrayList<>(100);

        String dbName = options.get("db_name");
        showDb(dbName);
        try {
            Class.forName("org.sqlite.JDBC");
            con = DriverManager.getConnection("jdbc:sqlite:" + dbName);

            List<String> whereClauses = new ArrayList<>(10);

            Map<String, List<String>> sampleGenotypes;
            Map<String, String> controlsMAFs = new LinkedHashMap<>();
            sampleGenotypes = processSamplesGT(options);

            if (options.containsKey("region_list") && !options.get("region_list").equals("")) {

                StringBuilder regionClauses = new StringBuilder("(");
                String[] regions = options.get("region_list").split(",");
                Pattern patternReg = Pattern.compile("(\\w+):(\\d+)-(\\d+)");
                Matcher matcherReg, matcherChr;

                for (int i = 0; i < regions.length; i++) {
                    String region = regions[i];
                    matcherReg = patternReg.matcher(region);
                    if (matcherReg.find()) {
                        String chr = matcherReg.group(1);
                        int start = Integer.valueOf(matcherReg.group(2));
                        int end = Integer.valueOf(matcherReg.group(3));

                        regionClauses.append("( variant_stats.chromosome='").append(chr).append("' AND ");
                        regionClauses.append("variant_stats.position>=").append(start).append(" AND ");
                        regionClauses.append("variant_stats.position<=").append(end).append(" )");

                        if (i < (regions.length - 1)) {
                            regionClauses.append(" OR ");

                        }
                    } else {
                        Pattern patternChr = Pattern.compile("(\\w+)");
                        matcherChr = patternChr.matcher(region);

                        if (matcherChr.find()) {
                            String chr = matcherChr.group();
                            regionClauses.append("( variant_stats.chromosome='").append(chr).append("')");

                            if (i < (regions.length - 1)) {
                                regionClauses.append(" OR ");
                            }
                        } else {
                            System.err.println("ERROR: Region (" + region + ")");
                        }
                    }
                }
                regionClauses.append(" ) ");
                whereClauses.add(regionClauses.toString());
            }

            if (options.containsKey("chr_pos") && !options.get("chr_pos").equals("")) {

                whereClauses.add("variant_stats.chromosome='" + options.get("chr_pos") + "'");
                if (options.containsKey("start_pos") && !options.get("start_pos").equals("")) {
                    whereClauses.add("variant_stats.position>=" + options.get("start_pos"));
                }

                if (options.containsKey("end_pos") && !options.get("end_pos").equals("")) {
                    whereClauses.add("variant_stats.position<=" + options.get("end_pos"));
                }
            }

            if (options.containsKey("mend_error") && !options.get("mend_error").equals("")) {
                String val = options.get("mend_error");
                String opt = options.get("option_mend_error");
                whereClauses.add("variant_stats.mendel_err " + opt + " " + val);

            }

            if (options.containsKey("is_indel") && options.get("is_indel").equalsIgnoreCase("on")) {
                whereClauses.add("variant_stats.is_indel=1");
            }

            if (options.containsKey("maf") && !options.get("maf").equals("")) {
                String val = options.get("maf");
                String opt = options.get("option_maf");
                whereClauses.add("variant_stats.maf " + opt + " " + val);

            }

            if (options.containsKey("mgf") && !options.get("mgf").equals("")) {
                String val = options.get("mgf");
                String opt = options.get("option_mgf");
                whereClauses.add("variant_stats.mgf " + opt + " " + val);

            }

            if (options.containsKey("miss_allele") && !options.get("miss_allele").equals("")) {
                String val = options.get("miss_allele");
                String opt = options.get("option_miss_allele");
                whereClauses.add("variant_stats.miss_allele " + opt + " " + val);
            }
            if (options.containsKey("miss_gt") && !options.get("miss_gt").equals("")) {
                String val = options.get("miss_gt");
                String opt = options.get("option_miss_gt");
                whereClauses.add("variant_stats.miss_gt " + opt + " " + val);

            }
            if (options.containsKey("cases_percent_dominant")
                    && !options.get("cases_percent_dominant").equals("")) {
                String val = options.get("cases_percent_dominant");
                String opt = options.get("option_cases_dom");
                whereClauses.add("variant_stats.cases_percent_dominant " + opt + " " + val);
            }

            if (options.containsKey("controls_percent_dominant")
                    && !options.get("controls_percent_dominant").equals("")) {
                String val = options.get("controls_percent_dominant");
                String opt = options.get("option_controls_dom");
                whereClauses.add("variant_stats.controls_percent_dominant " + opt + " " + val);
            }

            if (options.containsKey("cases_percent_recessive")
                    && !options.get("cases_percent_recessive").equals("")) {
                String val = options.get("cases_percent_recessive");
                String opt = options.get("option_cases_rec");
                whereClauses.add("variant_stats.cases_percent_recessive " + opt + " " + val);
            }

            if (options.containsKey("controls_percent_recessive")
                    && !options.get("controls_percent_recessive").equals("")) {
                String val = options.get("controls_percent_recessive");
                String opt = options.get("option_controls_rec");
                whereClauses.add("variant_stats.controls_percent_recessive " + opt + " " + val);
            }

            if (options.containsKey("biotype") && !options.get("biotype").equals("")) {
                String[] biotypes = options.get("biotype").split(",");

                StringBuilder biotypesClauses = new StringBuilder(" ( ");

                for (int i = 0; i < biotypes.length; i++) {
                    biotypesClauses.append("variant_effect.feature_biotype LIKE '%").append(biotypes[i])
                            .append("%'");

                    if (i < (biotypes.length - 1)) {
                        biotypesClauses.append(" OR ");
                    }
                }

                biotypesClauses.append(" ) ");
                whereClauses.add(biotypesClauses.toString());
            }

            if (options.containsKey("exc_1000g_controls")
                    && options.get("exc_1000g_controls").equalsIgnoreCase("on")) {
                whereClauses.add("(key NOT LIKE '1000G%' OR key is null)");
            } else if (options.containsKey("maf_1000g_controls") && !options.get("maf_1000g_controls").equals("")) {
                controlsMAFs.put("1000G", options.get("maf_1000g_controls"));
            }

            if (options.containsKey("exc_bier_controls")
                    && options.get("exc_bier_controls").equalsIgnoreCase("on")) {
                whereClauses.add("(key NOT LIKE 'BIER%' OR key is null)");
            } else if (options.containsKey("maf_bier_controls") && !options.get("maf_bier_controls").equals("")) {
                controlsMAFs.put("BIER", options.get("maf_bier_controls"));
            }

            if (options.containsKey("exc_evs_controls") && options.get("exc_evs_controls").equalsIgnoreCase("on")) {
                whereClauses.add("(key NOT LIKE 'EVS%' OR key is null)");
            } else if (options.containsKey("maf_evs_controls") && !options.get("maf_evs_controls").equals("")) {
                controlsMAFs.put("BIER", options.get("maf_evs_controls"));
            }

            if (options.containsKey("conseq_type[]") && !options.get("conseq_type[]").equals("")) {
                whereClauses.add(processConseqType(options.get("conseq_type[]")));
            }

            if (options.containsKey("genes") && !options.get("genes").equals("")) {
                whereClauses.add(processGeneList(options.get("genes")));
                //                processGeneList(options.get("genes"));
            }

            if (sampleGenotypes.size() > 0) {
                StringBuilder sg = new StringBuilder();
                int csg = 0;
                sg.append("(");
                for (Map.Entry<String, List<String>> entry : sampleGenotypes.entrySet()) {
                    sg.append("(");
                    sg.append("sample_name='").append(entry.getKey()).append("' AND (");

                    for (int i = 0; i < entry.getValue().size(); i++) {
                        String[] aux = entry.getValue().get(i).split("/");
                        sg.append("(");
                        sg.append("allele_1=").append(aux[0]).append(" AND allele_2=").append(aux[1]);
                        sg.append(")");

                        if (i + 1 < entry.getValue().size()) {
                            sg.append(" OR ");
                        }
                    }

                    sg.append(")");

                    sg.append(" OR sample_name<>'").append(entry.getKey()).append("'");

                    sg.append(")");

                    if (csg + 1 < sampleGenotypes.entrySet().size()) {
                        sg.append(" AND ");
                    }
                    csg++;
                }
                sg.append(")");
                System.out.println(sg);
                whereClauses.add(sg.toString());
            }

            String sql = "SELECT count(*) as count FROM sample ;";

            stmt = con.createStatement();
            ResultSet rs = stmt.executeQuery(sql);
            int numSamples = 0;

            while (rs.next()) {

                numSamples = rs.getInt("count");
            }

            stmt.close();

            System.out.println("controlsMAFs = " + controlsMAFs);

            System.out.println("sampleGenotypes = " + sampleGenotypes);

            String innerJoinVariantSQL = " left join variant_info on variant.id_variant=variant_info.id_variant ";
            //            String innerJoinEffectSQL = " inner join variant_effect on variant_effect.chromosome=variant.chromosome AND variant_effect.position=variant.position AND variant_effect.reference_allele=variant.ref AND variant_effect.alternative_allele = variant.alt ";

            sql = "SELECT distinct variant.genes,variant.consequence_types, variant.id_variant, variant_info.key, variant_info.value, sample_info.sample_name, sample_info.allele_1, sample_info.allele_2, variant_stats.chromosome ,"
                    + "variant_stats.position , variant_stats.allele_ref , variant_stats.allele_alt , variant_stats.id , variant_stats.maf , variant_stats.mgf, "
                    + "variant_stats.allele_maf , variant_stats.genotype_maf , variant_stats.miss_allele , variant_stats.miss_gt , variant_stats.mendel_err ,"
                    + "variant_stats.is_indel , variant_stats.cases_percent_dominant , variant_stats.controls_percent_dominant , variant_stats.cases_percent_recessive , variant_stats.controls_percent_recessive, "
                    + "variant.polyphen_score, variant.polyphen_effect, variant.sift_score, variant.sift_effect "
                    + " FROM variant_stats "
                    + "inner join variant on variant_stats.chromosome=variant.chromosome AND variant_stats.position=variant.position AND variant_stats.allele_ref=variant.ref AND variant_stats.allele_alt=variant.alt "
                    + "inner join sample_info on variant.id_variant=sample_info.id_variant " + innerJoinVariantSQL;

            if (whereClauses.size() > 0) {
                StringBuilder where = new StringBuilder(" where ");

                for (int i = 0; i < whereClauses.size(); i++) {
                    where.append(whereClauses.get(i));
                    if (i < whereClauses.size() - 1) {
                        where.append(" AND ");
                    }
                }

                sql += where.toString()
                        + " ORDER BY variant_stats.chromosome , variant_stats.position , variant_stats.allele_ref , variant_stats.allele_alt ;";
            }

            System.out.println(sql);

            System.out.println("Start SQL");
            long start = System.currentTimeMillis();
            stmt = con.createStatement();

            rs = stmt.executeQuery(sql);

            VariantStats vs;
            VariantInfo vi = null;

            String chr = "";
            int pos = 0;
            String ref = "", alt = "";

            System.out.println("End SQL: " + ((System.currentTimeMillis() - start) / 1000.0) + " s.");
            System.out.println("Processing");

            while (rs.next()) {
                if (!rs.getString("chromosome").equals(chr) || rs.getInt("position") != pos
                        || !rs.getString("allele_ref").equals(ref) || !rs.getString("allele_alt").equals(alt)) {

                    chr = rs.getString("chromosome");
                    pos = rs.getInt("position");
                    ref = rs.getString("allele_ref");
                    alt = rs.getString("allele_alt");

                    if (vi != null && filterGenotypes(vi, numSamples) && filterControls(vi, controlsMAFs)) {
                        list.add(vi);
                    }
                    vi = new VariantInfo(chr, pos, ref, alt);
                    vs = new VariantStats(chr, pos, ref, alt, rs.getDouble("maf"), rs.getDouble("mgf"),
                            rs.getString("allele_maf"), rs.getString("genotype_maf"), rs.getInt("miss_allele"),
                            rs.getInt("miss_gt"), rs.getInt("mendel_err"), rs.getInt("is_indel") == 1,
                            rs.getDouble("cases_percent_dominant"), rs.getDouble("controls_percent_dominant"),
                            rs.getDouble("cases_percent_recessive"), rs.getDouble("controls_percent_recessive"));
                    vs.setId(rs.getString("id"));

                    // vi.addGenotypes(rs.getString("genotypes"));

                    vi.addStats(vs);
                    vi.addGenes(rs.getString("genes"));
                    vi.addConsequenceTypes(rs.getString("consequence_types"));
                    vi.setPolyphen_score(rs.getDouble("polyphen_score"));
                    vi.setSift_score(rs.getDouble("sift_score"));
                    vi.setPolyphen_effect(rs.getInt("polyphen_effect"));
                    vi.setSift_effect(rs.getInt("sift_effect"));

                }

                if (rs.getString("key") != null && rs.getString("value") != null) {
                    vi.addControl(rs.getString("key"), rs.getString("value"));
                }

                String sample = rs.getString("sample_name");
                String gt = rs.getInt("allele_1") + "/" + rs.getInt("allele_2");

                vi.addSammpleGenotype(sample, gt);
                // vi.addGeneAndConsequenceType(rs.getString("gene_name"), rs.getString("consequence_type_obo"));

            }

            if (vi != null && filterGenotypes(vi, numSamples) && filterControls(vi, controlsMAFs)) {
                list.add(vi);
            }
            stmt.close();

            System.out.println("Total: (" + list.size() + ")");
            System.out.println("End processing: " + ((System.currentTimeMillis() - start) / 1000.0) + " s.");

            con.close();

        } catch (ClassNotFoundException | SQLException e) {
            System.err.println("STATS: " + e.getClass().getName() + ": " + e.getMessage());
        }

        return list;
    }

    private void showDb(String dbName) {
        System.out.println("DB: " + dbName);
    }

    @Override
    public List<VariantStats> getRecordsStats(Map<String, String> options) {

        Connection con;
        Statement stmt;
        List<VariantStats> list = new ArrayList<>(100);

        String dbName = options.get("db_name");

        try {
            Class.forName("org.sqlite.JDBC");
            con = DriverManager.getConnection("jdbc:sqlite:" + dbName);

            List<String> whereClauses = new ArrayList<>(10);

            if (options.containsKey("region_list") && !options.get("region_list").equals("")) {

                StringBuilder regionClauses = new StringBuilder("(");
                String[] regions = options.get("region_list").split(",");
                Pattern pattern = Pattern.compile("(\\w+):(\\d+)-(\\d+)");
                Matcher matcher;

                for (int i = 0; i < regions.length; i++) {
                    String region = regions[i];
                    matcher = pattern.matcher(region);
                    if (matcher.find()) {
                        String chr = matcher.group(1);
                        int start = Integer.valueOf(matcher.group(2));
                        int end = Integer.valueOf(matcher.group(3));

                        regionClauses.append("( variant_stats.chromosome='").append(chr).append("' AND ");
                        regionClauses.append("variant_stats.position>=").append(start).append(" AND ");
                        regionClauses.append("variant_stats.position<=").append(end).append(" )");

                        if (i < (regions.length - 1)) {
                            regionClauses.append(" OR ");

                        }

                    }
                }
                regionClauses.append(" ) ");
                whereClauses.add(regionClauses.toString());
            }

            if (options.containsKey("mend_error") && !options.get("mend_error").equals("")) {
                String val = options.get("mend_error");
                String opt = options.get("option_mend_error");
                whereClauses.add("variant_stats.mendel_err " + opt + " " + val);

            }

            if (options.containsKey("is_indel") && options.get("is_indel").equalsIgnoreCase("on")) {
                whereClauses.add("variant_stats.is_indel=1");
            }

            if (options.containsKey("maf") && !options.get("maf").equals("")) {
                String val = options.get("maf");
                String opt = options.get("option_maf");
                whereClauses.add("variant_stats.maf " + opt + " " + val);

            }

            if (options.containsKey("mgf") && !options.get("mgf").equals("")) {
                String val = options.get("mgf");
                String opt = options.get("option_mgf");
                whereClauses.add("variant_stats.mgf " + opt + " " + val);

            }

            if (options.containsKey("miss_allele") && !options.get("miss_allele").equals("")) {
                String val = options.get("miss_allele");
                String opt = options.get("option_miss_allele");
                whereClauses.add("variant_stats.miss_allele " + opt + " " + val);
            }
            if (options.containsKey("miss_gt") && !options.get("miss_gt").equals("")) {
                String val = options.get("miss_gt");
                String opt = options.get("option_miss_gt");
                whereClauses.add("variant_stats.miss_gt " + opt + " " + val);

            }
            if (options.containsKey("cases_percent_dominant")
                    && !options.get("cases_percent_dominant").equals("")) {
                String val = options.get("cases_percent_dominant");
                String opt = options.get("option_cases_dom");
                whereClauses.add("variant_stats.cases_percent_dominant " + opt + " " + val);
            }

            if (options.containsKey("controls_percent_dominant")
                    && !options.get("controls_percent_dominant").equals("")) {
                String val = options.get("controls_percent_dominant");
                String opt = options.get("option_controls_dom");
                whereClauses.add("variant_stats.controls_percent_dominant " + opt + " " + val);
            }

            if (options.containsKey("cases_percent_recessive")
                    && !options.get("cases_percent_recessive").equals("")) {
                String val = options.get("cases_percent_recessive");
                String opt = options.get("option_cases_rec");
                whereClauses.add("variant_stats.cases_percent_recessive " + opt + " " + val);
            }

            if (options.containsKey("controls_percent_recessive")
                    && !options.get("controls_percent_recessive").equals("")) {
                String val = options.get("controls_percent_recessive");
                String opt = options.get("option_controls_rec");
                whereClauses.add("variant_stats.controls_percent_recessive " + opt + " " + val);
            }

            if (options.containsKey("genes") && !options.get("genes").equals("")) {
                whereClauses.add(processGeneList(options.get("genes")));
            }

            String sql = "SELECT distinct variant_stats.chromosome ,"
                    + "variant_stats.position , variant_stats.allele_ref , variant_stats.allele_alt, variant_stats.maf , variant_stats.mgf, "
                    + "variant_stats.allele_maf , variant_stats.genotype_maf , variant_stats.miss_allele , variant_stats.miss_gt , variant_stats.mendel_err ,"
                    + "variant_stats.is_indel , variant_stats.cases_percent_dominant , variant_stats.controls_percent_dominant , variant_stats.cases_percent_recessive , variant_stats.controls_percent_recessive"
                    + " FROM variant_stats ";

            if (whereClauses.size() > 0) {
                StringBuilder where = new StringBuilder(" where ");

                for (int i = 0; i < whereClauses.size(); i++) {
                    where.append(whereClauses.get(i));
                    if (i < whereClauses.size() - 1) {
                        where.append(" AND ");
                    }
                }

                sql += where.toString()
                        + " ORDER BY variant_stats.chromosome , variant_stats.position , variant_stats.allele_ref ;";
            }

            System.out.println(sql);

            System.out.println("Start SQL");
            long start = System.currentTimeMillis();
            stmt = con.createStatement();

            stmt = con.createStatement();
            ResultSet rs = stmt.executeQuery(sql);

            VariantStats vs;
            VariantInfo vi = null;

            String chr = "";
            int pos = 0;
            String ref = "", alt = "";
            System.out.println("End SQL: " + ((System.currentTimeMillis() - start) / 1000.0) + " s.");

            System.out.println("Processing");

            while (rs.next()) {

                chr = rs.getString("chromosome");
                pos = rs.getInt("position");
                ref = rs.getString("allele_ref");
                alt = rs.getString("allele_alt");

                vs = new VariantStats(chr, pos, ref, alt, rs.getDouble("maf"), rs.getDouble("mgf"),
                        rs.getString("allele_maf"), rs.getString("genotype_maf"), rs.getInt("miss_allele"),
                        rs.getInt("miss_gt"), rs.getInt("mendel_err"), rs.getInt("is_indel") == 1,
                        rs.getDouble("cases_percent_dominant"), rs.getDouble("controls_percent_dominant"),
                        rs.getDouble("cases_percent_recessive"), rs.getDouble("controls_percent_recessive"));

                list.add(vs);

            }

            System.out.println("Total: (" + list.size() + ")");
            System.out.println("End processing: " + ((System.currentTimeMillis() - start) / 1000.0) + " s.");

            stmt.close();
            con.close();

        } catch (ClassNotFoundException | SQLException e) {
            System.err.println("STATS: " + e.getClass().getName() + ": " + e.getMessage());
        }

        return list;
    }

    @Override
    public List<VariantEffect> getEffect(Map<String, String> options) {

        Statement stmt;
        Connection con;
        List<VariantEffect> list = new ArrayList<>(100);

        String dbName = options.get("db_name");

        try {
            Class.forName("org.sqlite.JDBC");
            con = DriverManager.getConnection("jdbc:sqlite:" + dbName);

            String chr = options.get("chr");
            int pos = Integer.valueOf(options.get("pos"));
            String ref = options.get("ref");
            String alt = options.get("alt");

            String sql = "SELECT * FROM variant_effect WHERE chromosome='" + chr + "' AND position=" + pos
                    + " AND reference_allele='" + ref + "' AND alternative_allele='" + alt + "';";

            System.out.println(sql);

            stmt = con.createStatement();
            ResultSet rs = stmt.executeQuery(sql);

            VariantEffect ve;

            while (rs.next()) {
                ve = new VariantEffect(rs.getString("chromosome"), rs.getInt("position"),
                        rs.getString("reference_allele"), rs.getString("alternative_allele"),
                        rs.getString("feature_id"), rs.getString("feature_name"), rs.getString("feature_type"),
                        rs.getString("feature_biotype"), rs.getString("feature_chromosome"),
                        rs.getInt("feature_start"), rs.getInt("feature_end"), rs.getString("feature_strand"),
                        rs.getString("snp_id"), rs.getString("ancestral"), rs.getString("alternative"),
                        rs.getString("gene_id"), rs.getString("transcript_id"), rs.getString("gene_name"),
                        rs.getString("consequence_type"), rs.getString("consequence_type_obo"),
                        rs.getString("consequence_type_desc"), rs.getString("consequence_type_type"),
                        rs.getInt("aa_position"), rs.getString("aminoacid_change"), rs.getString("codon_change"));

                ve.setPolyphenEffect(rs.getInt("polyphen_effect"));
                ve.setSiftEffect(rs.getInt("sift_effect"));
                ve.setPolyphenScore(rs.getDouble("polyphen_score"));
                ve.setSiftScore(rs.getDouble("sift_score"));
                list.add(ve);

            }

            stmt.close();
            con.close();

        } catch (ClassNotFoundException | SQLException e) {
            System.err.println("EFFECT: " + e.getClass().getName() + ": " + e.getMessage());
        }

        return list;
    }

    @Override
    public VariantAnalysisInfo getAnalysisInfo(Map<String, String> options) {

        Statement stmt;
        Connection con;
        VariantAnalysisInfo vi = new VariantAnalysisInfo();

        String dbName = options.get("db_name");

        showDb(dbName);

        try {
            Class.forName("org.sqlite.JDBC");
            con = DriverManager.getConnection("jdbc:sqlite:" + dbName);

            String sql = "SELECT * FROM sample ;";

            stmt = con.createStatement();
            ResultSet rs = stmt.executeQuery(sql);

            while (rs.next()) {

                vi.addSample(rs.getString("name"));

            }

            stmt.close();

            sql = "select * from consequence_type_count";
            stmt = con.createStatement();
            rs = stmt.executeQuery(sql);

            while (rs.next()) {

                vi.addConsequenceType(rs.getString("consequence_type_obo"), rs.getInt("count"));

            }

            stmt.close();

            sql = "select * from biotype_count;";
            stmt = con.createStatement();
            rs = stmt.executeQuery(sql);

            while (rs.next()) {

                vi.addBiotype(rs.getString("feature_biotype"), rs.getInt("count"));

            }

            stmt.close();

            sql = "select * from global_stats";
            stmt = con.createStatement();
            rs = stmt.executeQuery(sql);

            while (rs.next()) {

                vi.addGlobalStats(rs.getString("name").toLowerCase(), rs.getDouble("value"));

            }

            stmt.close();

            sql = "select count(*) as count, chromosome from variant group by chromosome";
            stmt = con.createStatement();
            rs = stmt.executeQuery(sql);

            while (rs.next()) {

                vi.addChromosome(rs.getString("chromosome"), rs.getInt("count"));

            }

            stmt.close();
            con.close();

        } catch (ClassNotFoundException | SQLException e) {
            System.err.println("ANALYSIS INFO: " + e.getClass().getName() + ": " + e.getMessage());
        }

        return vi;
    }

    @Override
    public boolean close() {
        return true;
    }

    private String processGeneList(String genes) {
        System.out.println("genes = " + genes);
        List<String> list = new ArrayList<>();

        //        Client client = ClientBuilder.newClient();
        //        WebTarget webTarget = client.target("http://ws.bioinfo.cipf.es/cellbase/rest/latest/hsa/feature/gene/");

        Client client = Client.create();
        WebResource webResource = client
                .resource("http://ws.bioinfo.cipf.es/cellbase/rest/latest/hsa/feature/gene/");

        ObjectMapper mapper = new ObjectMapper();

        //        Response response = webTarget.path(genes).path("info").queryParam("of", "json").request().get();
        String response = webResource.path(genes).path("info").queryParam("of", "json").get(String.class);
        String data = response.toString();

        System.out.println("response = " + response);

        try {
            JsonNode actualObj = mapper.readTree(data);
            Iterator<JsonNode> it = actualObj.iterator();
            Iterator<JsonNode> aux;
            StringBuilder sb;

            while (it.hasNext()) {
                JsonNode node = it.next();
                if (node.isArray()) {

                    aux = node.iterator();
                    while (aux.hasNext()) {
                        JsonNode auxNode = aux.next();
                        sb = new StringBuilder("(");

                        System.out.println(
                                "auxNode.get(\"chromosome\").asText() = " + auxNode.get("chromosome").asText());

                        sb.append("variant_stats.chromosome='").append(auxNode.get("chromosome").asText())
                                .append("' AND ");
                        sb.append("variant_stats.position>=").append(auxNode.get("start")).append(" AND ");
                        sb.append("variant_stats.position<=").append(auxNode.get("end")).append(" )");

                        list.add(sb.toString());
                    }

                }
            }

        } catch (IOException e) {
            e.printStackTrace();
        }

        String res = "(" + StringUtils.join(list, " OR ") + ")";

        return res;

    }

    private boolean filterControls(VariantInfo vi, Map<String, String> controlsMAFs) {
        boolean res = true;

        String key;
        VariantControl vc;
        float controlMAF;

        for (Map.Entry<String, VariantControl> entry : vi.getControls().entrySet()) {

            key = entry.getKey();
            vc = entry.getValue();

            if (controlsMAFs.containsKey(key)) {

                controlMAF = Float.valueOf(controlsMAFs.get(key));
                if (vc.getMaf() > controlMAF) {
                    return false;
                }

            }
        }
        return res;
    }

    private String processConseqType(String conseqType) {
        List<String> clauses = new ArrayList<>(10);

        String[] cts = conseqType.split(",");

        for (String ct : cts) {
            clauses.add("(variant.consequence_types LIKE '%" + ct + "%' )");
        }

        String res = "";
        if (clauses.size() > 0) {
            res = "(" + StringUtils.join(clauses, " OR ") + ")";
        }

        return res;
    }

    private boolean filterGenotypes(VariantInfo variantInfo, int numSamples) {
        //        if (variantInfo.getSampleGenotypes().size() != numSamples) {
        //            return false;
        //        } else {
        //            return true;
        //        }
        return variantInfo.getSampleGenotypes().size() == numSamples;

    }

    private Map<String, List<String>> processSamplesGT(Map<String, String> options) {
        Map<String, List<String>> samplesGenotypes = new LinkedHashMap<>(10);
        List<String> genotypesList;

        String key, val;
        for (Map.Entry<String, String> entry : options.entrySet()) {
            key = entry.getKey();
            val = entry.getValue();

            if (key.startsWith("sampleGT_")) {
                String sampleName = key.replace("sampleGT_", "").replace("[]", "");
                String[] genotypes = val.split(",");

                if (samplesGenotypes.containsKey(sampleName)) {
                    genotypesList = samplesGenotypes.get(sampleName);
                } else {

                    genotypesList = new ArrayList<>();
                    samplesGenotypes.put(sampleName, genotypesList);
                }

                for (int i = 0; i < genotypes.length; i++) {

                    genotypesList.add(genotypes[i]);
                }

            }

        }
        return samplesGenotypes;
    }

    private void processSamplesGT(Map<String, String> options, List<String> whereClauses) {
        String key, val;

        List<String> auxClauses = new ArrayList<>();
        for (Map.Entry<String, String> entry : options.entrySet()) {
            key = entry.getKey();
            val = entry.getValue();

            if (key.startsWith("sampleGT_")) {
                String sampleName = key.replace("sampleGT_", "").replace("[]", "");
                String[] genotypes = val.split(",");
                StringBuilder sb = new StringBuilder("(");

                for (int i = 0; i < genotypes.length; i++) {
                    String[] gt = genotypes[i].split("_");

                    sb.append("(");
                    sb.append("sample_info.sample_name='" + sampleName + "'");
                    sb.append(" AND sample_info.allele_1=" + gt[0]);
                    sb.append(" AND sample_info.allele_2=" + gt[1]);

                    sb.append(")");

                    if (i < genotypes.length - 1) {
                        sb.append(" OR ");
                    }
                }
                sb.append(")");
                auxClauses.add(sb.toString());
            }

        }

        if (auxClauses.size() > 0) {
            String finalSampleWhere = StringUtils.join(auxClauses, " AND ");

            whereClauses.add(finalSampleWhere);

        }

    }

    /* ******************************************
     *          Path and index checking         *
     * ******************************************/

    private Path getMetaDir(Path file) {
        String inputName = file.getFileName().toString();
        return file.getParent().resolve(".meta_" + inputName);
    }

}