Java tutorial
/* * Copyright [1999-2014] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.funcgen; import java.io.File; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.io.IOException; import org.apache.commons.lang.StringUtils; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.Priority; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; public class CheckResultSetDBFileLink extends SingleDatabaseTestCase { String[] windows = { "30", "65", "130", "260", "450", "648", "950", "1296" }; protected String[] windowSizes() { return windows; } public CheckResultSetDBFileLink() { addToGroup("post_regulatorybuild"); addToGroup("funcgen");//do we need this group and the funcgen-release group? addToGroup("funcgen-release"); setTeamResponsible(Team.FUNCGEN); setDescription("Checks if the binary signal (col) files exist for relevant ResultSets\n" + "Also checks dbfile_data_root subdirs to see if there are still DISPLAYABLE or if they support a regualtory build\n"); setPriority(Priority.AMBER); setEffect("Signal tracks will not display in the browser.\n" + "NOTE: RegulatorySets does something similar, but from the DataSet perspective\n " + "\tHence, consider those HC results first, before fixing these!"); setFix("Re-create files or check file names manually."); } private String getSupportedRegulatoryFeatureSet(Connection con, String subdirName) { String regFsetSQL = "SELECT fs.name from result_set rs, supporting_set ss, data_set ds, " + "supporting_set ss1, data_set ds1, feature_set fs WHERE " + "rs.result_set_id=ss.supporting_set_id and ss.type='result' and ss.data_set_id=ds.data_set_id " + "AND ds.feature_set_id=ss1.supporting_set_id and ss1.type='feature' and " + "ss1.data_set_id=ds1.data_set_id and ds1.feature_set_id=fs.feature_set_id and " + "fs.type='regulatory' and fs.name not rlike '.*_v[0-9]+$' and rs.name='" + subdirName + "'"; String regFset = null; try { Statement stmt = con.createStatement(); ResultSet supportedRegFset = stmt.executeQuery(regFsetSQL); if ((supportedRegFset != null) && supportedRegFset.next()) { regFset = supportedRegFset.getString(1); //doesn't matter if we get duplicate entries here based on //redundant rset names. catch reundant set before here } } catch (SQLException e) { e.printStackTrace(); } return regFset; } public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); try { Statement stmt = con.createStatement(); int MAX_REPORT = 50; //Only out 50 problems by default HashMap<String, String> rSetDBLinks = new HashMap<String, String>(); HashMap<String, String> rSetFClasses = new HashMap<String, String>(); HashMap<String, String> rSetStates = new HashMap<String, String>(); HashMap<String, String> rSetRFSets = new HashMap<String, String>(); ArrayList<String> removeableRsets = new ArrayList<String>(); String rsetInfoSQL = "SELECT rs.name, dbf.path, s1.name, rs.feature_class from result_set rs left join dbfile_registry dbf " + "ON rs.result_set_id=dbf.table_id and dbf.table_name='result_set' left join " + "(select s.table_id, sn.name from status s, status_name sn where " + "s.status_name_id=sn.status_name_id and s.table_name='result_set' and sn.name='DISPLAYABLE') s1 " + "ON rs.result_set_id=s1.table_id"; ResultSet rsetInfo = stmt.executeQuery(rsetInfoSQL); String rsetStatus, rsetPath, rsetName, regFset, rsetFClass; //String infoString = ""; while ((rsetInfo != null) && rsetInfo.next()) { rsetName = rsetInfo.getString(1); rsetPath = rsetInfo.getString(2); rsetStatus = rsetInfo.getString(3); rsetFClass = rsetInfo.getString(4); regFset = this.getSupportedRegulatoryFeatureSet(con, rsetName); //TEST IF WE HAVE SEEN A REDUNDANTLY NAMED RESULT_SET if (rSetDBLinks.containsKey(rsetName)) { //bail out here or continue? //or could mark for deletion as we could have >2 ReportManager.problem(this, con, "Found redundant result_set naming:\t" + rsetName + "\nEither rectify in DB or updated HC to account for result_set unique key"); return false; //bail out as results maybe unsafe } if ((rsetPath != null) || (rsetStatus != null) || (regFset != null)) { rSetFClasses.put(rsetName, rsetFClass); rSetDBLinks.put(rsetName, rsetPath); rSetStates.put(rsetName, rsetStatus); rSetRFSets.put(rsetName, regFset); } else { removeableRsets.add(rsetName); } } if (removeableRsets.size() > 0) { //Should this be info instead? ReportManager.problem(this, con, "Found " + removeableRsets.size() + " 'removeable' result_sets i.e. not DISPLAYABLE, not in build and has no dbfile_registry.path:\n\t" + StringUtils.join(removeableRsets, "\n\t") + "\n"); result = false; } int numRsets = rSetDBLinks.size(); //Get Base Folder ResultSet rsetDBDataRoot = stmt .executeQuery("SELECT meta_value from meta where meta_key='dbfile.data_root'"); String problemString; //For easier interpretation/reporting, build 1 problem string per result_set/subDir, if ((rsetDBDataRoot != null) && rsetDBDataRoot.next()) { String root_dir = rsetDBDataRoot.getString(1); //rsetDBDataRoot.close(); // don't need this anymore as reusing stmt will close this ReportManager.problem(this, con, "Found dbfile.data_root meta key. Need to remove this once all the other failures have been resolved"); result = false; //This can be removed once we resolve the dbfile.data_root issue. /nfs/ensnfs-dev/staging/nNeed adding to config //and add species and assembly //Get distinct result_set feature_class values String fclassSQL = "SELECT distinct(feature_class) from result_set"; ResultSet rsetFclasses = stmt.executeQuery(fclassSQL); //rsetFclasses and subdirRsetIDs ResultSets interleved here from same stmt! //This may cause issues, but seemingly not the issue we are experiencing //A ResultSet object is automatically closed when the Statement object that generated it is closed, re-executed, //or used to retrieve the next result from a sequence of multiple results. << counts or ResultSets, this is very rare and can probably be ignored //This is only applicable to using execute, then a separate get method on Statment e.g. getResultSet while ((rsetFclasses != null) && rsetFclasses.next()) { String featureClass = rsetFclasses.getString(1); //TEST EXISTING DIRECTORIES ARE RESULT SETS String resultSetPath = root_dir + "/" + featureClass + "_feature"; File resultSetPathF = new File(resultSetPath); if (resultSetPathF.exists() && resultSetPathF.isDirectory()) { String[] subDirs = resultSetPathF.list(); String rsetSQL; ArrayList<String> subdirProblems = new ArrayList<String>(); Statement stmt1 = con.createStatement(); boolean seenREADME = false; for (String subDir : subDirs) { problemString = ""; //Check is not a soft link //as these are to support archives, and really need testing in another HC boolean isLink = true; String fullPath = resultSetPath + "/" + subDir; try { isLink = isSymLink(fullPath); } catch (IOException i) { i.printStackTrace(); } if (subDir.equals("README")) { seenREADME = true; continue; } else if (isLink) { continue; } rsetSQL = "SELECT result_set_id from result_set where name='" + subDir + "'"; ResultSet subdirRsetIDs = stmt1.executeQuery(rsetSQL); if ((root_dir != null) && subdirRsetIDs.next()) { //String rsetID = subdirRsetIDs.getString(1); //logger.fine("Found result_feature subdir:\t" + subDirs[i] + " with rset id\t" + rsetID); if (subdirRsetIDs.next()) { problemString += "\tCannot find unique result_set. Check manually or update HC\n"; } //CATCH SUBDIRS WHICH FOR RESULT_SETS WITHOUT DBFILE_REGISTRY/DISPLAYABLE ENTRY OR IN BUILD if (removeableRsets.contains(subDir)) { problemString += "\tAppears to be 'removeable' i.e. not DISPLAYABLE, not in build and has no dbfile_registry.path.\n"; } } else { problemString += "\tCannot find result_set.\n"; } if (!problemString.equals("")) { subdirProblems.add(subDir + " " + featureClass + "_feature subdir has problems:\n" + problemString); } } if (seenREADME == false) { ReportManager.problem(this, con, "No README file present in:\t" + resultSetPath); result = false; } int numProbs = subdirProblems.size(); if (numProbs != 0) { ReportManager.problem(this, con, "Found " + numProbs + " " + featureClass + "_feature subdirs with problems (use -output info for all)."); result = false; for (int i = 0; i < numProbs; i++) { if (i >= MAX_REPORT) { //Both these seem to report even with when restricting to -output problem? ReportManager.info(this, con, subdirProblems.get(i).toString()); } else { ReportManager.problem(this, con, subdirProblems.get(i).toString()); } } } else { ReportManager.info(this, con, "Found 0 " + featureClass + "_feature subdirs with problems."); } } else { ReportManager.problem(this, con, "Cannot test if result_set dirs are valid as path does not exist or is not a directory:\t" + resultSetPath); result = false; //Don't return here as rsetPaths in DB may now be pointing to as different path } } if (numRsets == 0) { ReportManager.problem(this, con, "dbfile_root is defined in the meta table but found no result_sets can be found"); result = false; //Could return here? } else { // NOW CHECK EXISTING RESULT SETS File root_dir_f = new File(root_dir); if (root_dir_f.exists()) { ArrayList<String> rsetProblems = new ArrayList<String>(); Iterator<String> dbLinkIt = rSetDBLinks.keySet().iterator(); //Here we are iterating over all the rSetDBLinks twice //once for each FeatureClass //but we get the rsetFClass below Object tmpObject; while (dbLinkIt.hasNext()) { rsetName = dbLinkIt.next().toString(); //Need to bring in the class here too problemString = ""; //toString on null was failing silently here! rsetPath = ((tmpObject = rSetDBLinks.get(rsetName)) == null) ? "NO DBFILE_REGISTRY PATH" : tmpObject.toString(); rsetStatus = ((tmpObject = rSetStates.get(rsetName)) == null) ? "NOT DISPLAYABLE" : tmpObject.toString(); regFset = ((tmpObject = rSetRFSets.get(rsetName)) == null) ? "NOT IN BUILD" : tmpObject.toString(); rsetFClass = rSetFClasses.get(rsetName); //Will always be defined //Report all these together for easier interpretation if ((rsetPath.equals("NO DBFILE_REGISTRY PATH") || rsetStatus.equals("NOT DISPLAYABLE") || regFset.equals("NOT IN BUILD")) && rsetFClass.equals("result")) { problemString += "\tdbfile_registry.path:\t" + rsetPath + "\n\t" + "IS " + rsetStatus + "\n\t" + "Supports:\t" + regFset + "\n"; } else if ((rsetPath.equals("NO DBFILE_REGISTRY PATH") || rsetStatus.equals("NOT DISPLAYABLE")) && rsetFClass.equals("dna_methylation")) { problemString += "\tdbfile_registry.path:\t" + rsetPath + "\n\t" + "IS " + rsetStatus + "\n"; } if (!rsetPath.equals("NO DBFILE_REGISTRY PATH")) {// NOW TEST COL FILES String rSetFinalPath = root_dir + rsetPath; File rsetFileFolder = new File(rSetFinalPath); if (rsetFileFolder.exists()) { if (rsetFClass.equals("result")) { //String[] windows = {"30","65","130","260","450","648","950","1296"}; //for(int i=0;i<windows.length;i++){ String[] windowSizes = windowSizes(); for (String wSize : windowSizes) { String rsetWindowFileName = rSetFinalPath + "/result_features." + rsetName + "." + wSize + ".col"; File rsetWindowFile = new File(rsetWindowFileName); if (rsetWindowFile.exists()) { if (rsetWindowFile.length() == 0) { problemString += "\tEmpty file:\t" + rsetWindowFileName + "\n"; } } else { problemString += "\tFile does not exist:\t" + rsetWindowFileName + "\n"; } } } } else { problemString += "\tdbfile_registry.path does not exist:\t" + rSetFinalPath + "\n"; } } if (!problemString.equals("")) { rsetProblems.add(rsetName + " ResultSet has problems:\n" + problemString); } } int numProbs = rsetProblems.size(); if (numProbs != 0) { ReportManager.problem(this, con, "Found " + numProbs + " ResultSets with problems.\n"); result = false; for (int i = 0; i < numProbs; i++) { if (i >= MAX_REPORT) { //Both these seem to report even with when restricting to -output problem? ReportManager.info(this, con, rsetProblems.get(i).toString()); } else { ReportManager.problem(this, con, rsetProblems.get(i).toString()); } } } else { ReportManager.info(this, con, "Found 0 ResultSets with problems."); } } else { ReportManager.problem(this, con, "Found " + numRsets + " result_sets but " + "dbfile.data_root does not seem to be valid:\t" + root_dir); result = false; //could return here? } } // END OF EXISTING RESULT SET CHECK } else { //no rsetDBDataRoot if (numRsets == 0) { //could sanity check we don't have a build here? ReportManager.info(this, con, "Found no result_sets or dbfile.data_root"); } else { ReportManager.problem(this, con, "Found " + numRsets + "result_sets but no dbfile.data_root meta key. Please add a dbfile.data_root meta key to perform this HC"); result = false; //could return here? } } } catch (SQLException e) { e.printStackTrace(); } return result; } // Need to push these to some core File utils class public static boolean isSymLink(File file) throws IOException { if (file == null) throw new NullPointerException("File argument cannot be null"); File cfile; if (file.getParent() == null) { cfile = file; } else { File canonDir = file.getParentFile().getCanonicalFile(); cfile = new File(canonDir, file.getName()); } return !cfile.getCanonicalFile().equals(cfile.getAbsoluteFile()); } public static boolean isSymLink(String path) throws IOException { if (path == null) throw new NullPointerException("Path argument cannot be null"); File pathFile = new File(path); return isSymLink(pathFile); } }