org.ensembl.healthcheck.testcase.funcgen.CheckResultSetDBFileLink.java Source code

Java tutorial

Introduction

Here is the source code for org.ensembl.healthcheck.testcase.funcgen.CheckResultSetDBFileLink.java

Source

/*
 * Copyright [1999-2014] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.ensembl.healthcheck.testcase.funcgen;

import java.io.File;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.Priority;
import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase;

public class CheckResultSetDBFileLink extends SingleDatabaseTestCase {

    String[] windows = { "30", "65", "130", "260", "450", "648", "950", "1296" };

    protected String[] windowSizes() {
        return windows;
    }

    public CheckResultSetDBFileLink() {
        addToGroup("post_regulatorybuild");
        addToGroup("funcgen");//do we need this group and the funcgen-release group?
        addToGroup("funcgen-release");
        setTeamResponsible(Team.FUNCGEN);

        setDescription("Checks if the binary signal (col) files exist for relevant ResultSets\n"
                + "Also checks dbfile_data_root subdirs to see if there are still DISPLAYABLE or if they support a regualtory build\n");

        setPriority(Priority.AMBER);
        setEffect("Signal tracks will not display in the browser.\n"
                + "NOTE: RegulatorySets does something similar, but from the DataSet perspective\n "
                + "\tHence, consider those HC results first, before fixing these!");
        setFix("Re-create files or check file names manually.");

    }

    private String getSupportedRegulatoryFeatureSet(Connection con, String subdirName) {

        String regFsetSQL = "SELECT fs.name from result_set rs, supporting_set ss, data_set ds, "
                + "supporting_set ss1, data_set ds1, feature_set fs WHERE "
                + "rs.result_set_id=ss.supporting_set_id and ss.type='result' and ss.data_set_id=ds.data_set_id "
                + "AND ds.feature_set_id=ss1.supporting_set_id and ss1.type='feature' and "
                + "ss1.data_set_id=ds1.data_set_id and ds1.feature_set_id=fs.feature_set_id and "
                + "fs.type='regulatory' and fs.name not rlike '.*_v[0-9]+$' and rs.name='" + subdirName + "'";
        String regFset = null;

        try {
            Statement stmt = con.createStatement();
            ResultSet supportedRegFset = stmt.executeQuery(regFsetSQL);

            if ((supportedRegFset != null) && supportedRegFset.next()) {
                regFset = supportedRegFset.getString(1);
                //doesn't matter if we get duplicate entries here based on
                //redundant rset names. catch reundant set before here
            }
        } catch (SQLException e) {
            e.printStackTrace();
        }

        return regFset;
    }

    public boolean run(DatabaseRegistryEntry dbre) {

        boolean result = true;
        Connection con = dbre.getConnection();

        try {
            Statement stmt = con.createStatement();
            int MAX_REPORT = 50; //Only out 50 problems by default
            HashMap<String, String> rSetDBLinks = new HashMap<String, String>();
            HashMap<String, String> rSetFClasses = new HashMap<String, String>();
            HashMap<String, String> rSetStates = new HashMap<String, String>();
            HashMap<String, String> rSetRFSets = new HashMap<String, String>();
            ArrayList<String> removeableRsets = new ArrayList<String>();

            String rsetInfoSQL = "SELECT rs.name, dbf.path, s1.name, rs.feature_class from result_set rs left join dbfile_registry dbf "
                    + "ON rs.result_set_id=dbf.table_id and dbf.table_name='result_set' left join "
                    + "(select s.table_id, sn.name from status s, status_name sn where "
                    + "s.status_name_id=sn.status_name_id and s.table_name='result_set' and sn.name='DISPLAYABLE') s1 "
                    + "ON rs.result_set_id=s1.table_id";

            ResultSet rsetInfo = stmt.executeQuery(rsetInfoSQL);
            String rsetStatus, rsetPath, rsetName, regFset, rsetFClass;
            //String infoString = "";

            while ((rsetInfo != null) && rsetInfo.next()) {
                rsetName = rsetInfo.getString(1);
                rsetPath = rsetInfo.getString(2);
                rsetStatus = rsetInfo.getString(3);
                rsetFClass = rsetInfo.getString(4);
                regFset = this.getSupportedRegulatoryFeatureSet(con, rsetName);

                //TEST IF WE HAVE SEEN A REDUNDANTLY NAMED RESULT_SET
                if (rSetDBLinks.containsKey(rsetName)) {
                    //bail out here or continue?
                    //or could mark for deletion as we could have >2 
                    ReportManager.problem(this, con, "Found redundant result_set naming:\t" + rsetName
                            + "\nEither rectify in DB or updated HC to account for result_set unique key");
                    return false; //bail out as results maybe unsafe
                }

                if ((rsetPath != null) || (rsetStatus != null) || (regFset != null)) {
                    rSetFClasses.put(rsetName, rsetFClass);
                    rSetDBLinks.put(rsetName, rsetPath);
                    rSetStates.put(rsetName, rsetStatus);
                    rSetRFSets.put(rsetName, regFset);

                } else {
                    removeableRsets.add(rsetName);
                }
            }

            if (removeableRsets.size() > 0) {
                //Should this be info instead?
                ReportManager.problem(this, con, "Found " + removeableRsets.size()
                        + " 'removeable' result_sets i.e. not DISPLAYABLE, not in build and has no dbfile_registry.path:\n\t"
                        + StringUtils.join(removeableRsets, "\n\t") + "\n");
                result = false;
            }

            int numRsets = rSetDBLinks.size();

            //Get Base Folder
            ResultSet rsetDBDataRoot = stmt
                    .executeQuery("SELECT meta_value from meta where meta_key='dbfile.data_root'");
            String problemString; //For easier interpretation/reporting, build 1 problem string per result_set/subDir, 

            if ((rsetDBDataRoot != null) && rsetDBDataRoot.next()) {
                String root_dir = rsetDBDataRoot.getString(1);
                //rsetDBDataRoot.close(); // don't need this anymore as reusing stmt will close this

                ReportManager.problem(this, con,
                        "Found dbfile.data_root meta key. Need to remove this once all the other failures have been resolved");
                result = false;
                //This can be removed once we resolve the dbfile.data_root issue. /nfs/ensnfs-dev/staging/nNeed adding to config
                //and add species and assembly

                //Get distinct result_set feature_class values
                String fclassSQL = "SELECT distinct(feature_class) from result_set";
                ResultSet rsetFclasses = stmt.executeQuery(fclassSQL);

                //rsetFclasses and subdirRsetIDs ResultSets interleved here from same stmt! 
                //This may cause issues, but seemingly not the issue we are experiencing
                //A ResultSet object is automatically closed when the Statement object that generated it is closed, re-executed, 
                //or used to retrieve the next result from a sequence of multiple results. << counts or ResultSets, this is very rare and can probably be ignored
                //This is only applicable to using execute, then a separate get method on Statment e.g. getResultSet

                while ((rsetFclasses != null) && rsetFclasses.next()) {
                    String featureClass = rsetFclasses.getString(1);

                    //TEST EXISTING DIRECTORIES ARE RESULT SETS
                    String resultSetPath = root_dir + "/" + featureClass + "_feature";
                    File resultSetPathF = new File(resultSetPath);

                    if (resultSetPathF.exists() && resultSetPathF.isDirectory()) {
                        String[] subDirs = resultSetPathF.list();
                        String rsetSQL;
                        ArrayList<String> subdirProblems = new ArrayList<String>();
                        Statement stmt1 = con.createStatement();
                        boolean seenREADME = false;

                        for (String subDir : subDirs) {
                            problemString = "";

                            //Check is not a soft link
                            //as these are to support archives, and really need testing in another HC
                            boolean isLink = true;
                            String fullPath = resultSetPath + "/" + subDir;
                            try {
                                isLink = isSymLink(fullPath);
                            } catch (IOException i) {
                                i.printStackTrace();
                            }

                            if (subDir.equals("README")) {
                                seenREADME = true;
                                continue;
                            } else if (isLink) {
                                continue;
                            }

                            rsetSQL = "SELECT result_set_id from result_set where name='" + subDir + "'";
                            ResultSet subdirRsetIDs = stmt1.executeQuery(rsetSQL);

                            if ((root_dir != null) && subdirRsetIDs.next()) {
                                //String rsetID         = subdirRsetIDs.getString(1);
                                //logger.fine("Found result_feature subdir:\t" + subDirs[i] + " with rset id\t" + rsetID);

                                if (subdirRsetIDs.next()) {
                                    problemString += "\tCannot find unique result_set. Check manually or update HC\n";
                                }

                                //CATCH SUBDIRS WHICH FOR RESULT_SETS WITHOUT DBFILE_REGISTRY/DISPLAYABLE ENTRY OR IN BUILD
                                if (removeableRsets.contains(subDir)) {
                                    problemString += "\tAppears to be 'removeable' i.e. not DISPLAYABLE, not in build and has no dbfile_registry.path.\n";
                                }
                            } else {
                                problemString += "\tCannot find result_set.\n";
                            }

                            if (!problemString.equals("")) {
                                subdirProblems.add(subDir + " " + featureClass + "_feature subdir has problems:\n"
                                        + problemString);
                            }
                        }

                        if (seenREADME == false) {
                            ReportManager.problem(this, con, "No README file present in:\t" + resultSetPath);
                            result = false;
                        }

                        int numProbs = subdirProblems.size();

                        if (numProbs != 0) {
                            ReportManager.problem(this, con, "Found " + numProbs + " " + featureClass
                                    + "_feature subdirs with problems (use -output info for all).");
                            result = false;

                            for (int i = 0; i < numProbs; i++) {

                                if (i >= MAX_REPORT) {
                                    //Both these seem to report even with when restricting to -output problem?
                                    ReportManager.info(this, con, subdirProblems.get(i).toString());
                                } else {
                                    ReportManager.problem(this, con, subdirProblems.get(i).toString());
                                }
                            }
                        } else {
                            ReportManager.info(this, con,
                                    "Found 0 " + featureClass + "_feature subdirs with problems.");
                        }

                    } else {
                        ReportManager.problem(this, con,
                                "Cannot test if result_set dirs are valid as path does not exist or is not a directory:\t"
                                        + resultSetPath);
                        result = false;
                        //Don't return here as rsetPaths in DB may now be pointing to as different path
                    }
                }

                if (numRsets == 0) {
                    ReportManager.problem(this, con,
                            "dbfile_root is defined in the meta table but found no result_sets can be found");
                    result = false; //Could return here?

                } else { // NOW CHECK EXISTING RESULT SETS
                    File root_dir_f = new File(root_dir);

                    if (root_dir_f.exists()) {
                        ArrayList<String> rsetProblems = new ArrayList<String>();
                        Iterator<String> dbLinkIt = rSetDBLinks.keySet().iterator();
                        //Here we are iterating over all the rSetDBLinks twice
                        //once for each FeatureClass
                        //but we get the rsetFClass below
                        Object tmpObject;

                        while (dbLinkIt.hasNext()) {
                            rsetName = dbLinkIt.next().toString();

                            //Need to bring in the class here too

                            problemString = "";
                            //toString on null was failing silently here!
                            rsetPath = ((tmpObject = rSetDBLinks.get(rsetName)) == null) ? "NO DBFILE_REGISTRY PATH"
                                    : tmpObject.toString();
                            rsetStatus = ((tmpObject = rSetStates.get(rsetName)) == null) ? "NOT DISPLAYABLE"
                                    : tmpObject.toString();
                            regFset = ((tmpObject = rSetRFSets.get(rsetName)) == null) ? "NOT IN BUILD"
                                    : tmpObject.toString();
                            rsetFClass = rSetFClasses.get(rsetName); //Will always be defined

                            //Report all these together for easier interpretation   
                            if ((rsetPath.equals("NO DBFILE_REGISTRY PATH") || rsetStatus.equals("NOT DISPLAYABLE")
                                    || regFset.equals("NOT IN BUILD")) && rsetFClass.equals("result")) {

                                problemString += "\tdbfile_registry.path:\t" + rsetPath + "\n\t" + "IS "
                                        + rsetStatus + "\n\t" + "Supports:\t" + regFset + "\n";
                            } else if ((rsetPath.equals("NO DBFILE_REGISTRY PATH")
                                    || rsetStatus.equals("NOT DISPLAYABLE"))
                                    && rsetFClass.equals("dna_methylation")) {
                                problemString += "\tdbfile_registry.path:\t" + rsetPath + "\n\t" + "IS "
                                        + rsetStatus + "\n";
                            }

                            if (!rsetPath.equals("NO DBFILE_REGISTRY PATH")) {// NOW TEST COL FILES
                                String rSetFinalPath = root_dir + rsetPath;
                                File rsetFileFolder = new File(rSetFinalPath);

                                if (rsetFileFolder.exists()) {

                                    if (rsetFClass.equals("result")) {

                                        //String[] windows = {"30","65","130","260","450","648","950","1296"}; 
                                        //for(int i=0;i<windows.length;i++){

                                        String[] windowSizes = windowSizes();

                                        for (String wSize : windowSizes) {
                                            String rsetWindowFileName = rSetFinalPath + "/result_features."
                                                    + rsetName + "." + wSize + ".col";
                                            File rsetWindowFile = new File(rsetWindowFileName);

                                            if (rsetWindowFile.exists()) {
                                                if (rsetWindowFile.length() == 0) {
                                                    problemString += "\tEmpty file:\t" + rsetWindowFileName + "\n";
                                                }
                                            } else {
                                                problemString += "\tFile does not exist:\t" + rsetWindowFileName
                                                        + "\n";
                                            }
                                        }
                                    }

                                } else {
                                    problemString += "\tdbfile_registry.path does not exist:\t" + rSetFinalPath
                                            + "\n";
                                }
                            }

                            if (!problemString.equals("")) {
                                rsetProblems.add(rsetName + " ResultSet has problems:\n" + problemString);
                            }
                        }

                        int numProbs = rsetProblems.size();

                        if (numProbs != 0) {
                            ReportManager.problem(this, con, "Found " + numProbs + " ResultSets with problems.\n");
                            result = false;

                            for (int i = 0; i < numProbs; i++) {

                                if (i >= MAX_REPORT) {
                                    //Both these seem to report even with when restricting to -output problem?
                                    ReportManager.info(this, con, rsetProblems.get(i).toString());
                                } else {
                                    ReportManager.problem(this, con, rsetProblems.get(i).toString());
                                }
                            }
                        } else {
                            ReportManager.info(this, con, "Found 0 ResultSets with problems.");
                        }
                    } else {
                        ReportManager.problem(this, con, "Found " + numRsets + " result_sets but "
                                + "dbfile.data_root does not seem to be valid:\t" + root_dir);
                        result = false; //could return here?
                    }
                } // END OF EXISTING RESULT SET CHECK
            } else { //no rsetDBDataRoot

                if (numRsets == 0) {
                    //could sanity check we don't have a build here?
                    ReportManager.info(this, con, "Found no result_sets or dbfile.data_root");
                } else {
                    ReportManager.problem(this, con, "Found " + numRsets
                            + "result_sets but no dbfile.data_root meta key. Please add a  dbfile.data_root meta key to perform this HC");
                    result = false; //could return here?
                }
            }

        } catch (SQLException e) {
            e.printStackTrace();
        }

        return result;
    }

    // Need to push these to some core File utils class

    public static boolean isSymLink(File file) throws IOException {

        if (file == null)
            throw new NullPointerException("File argument cannot be null");

        File cfile;

        if (file.getParent() == null) {
            cfile = file;
        } else {
            File canonDir = file.getParentFile().getCanonicalFile();
            cfile = new File(canonDir, file.getName());
        }

        return !cfile.getCanonicalFile().equals(cfile.getAbsoluteFile());
    }

    public static boolean isSymLink(String path) throws IOException {
        if (path == null)
            throw new NullPointerException("Path argument cannot be null");
        File pathFile = new File(path);
        return isSymLink(pathFile);
    }
}