Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.ctakes.ytex.kernel.dao; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.net.URISyntaxException; import java.net.URL; import java.sql.ResultSet; import java.sql.SQLException; import java.util.Arrays; import java.util.HashSet; import java.util.Properties; import java.util.Set; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import javax.sql.DataSource; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.ctakes.ytex.kernel.FileUtil; import org.apache.ctakes.ytex.kernel.IntrinsicInfoContentEvaluator; import org.apache.ctakes.ytex.kernel.KernelContextHolder; import org.apache.ctakes.ytex.kernel.model.ConcRel; import org.apache.ctakes.ytex.kernel.model.ConceptGraph; import org.hibernate.SessionFactory; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.jdbc.core.RowCallbackHandler; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; public class ConceptDaoImpl implements ConceptDao { private static final String CONCEPT_GRAPH_PATH = "org/apache/ctakes/ytex/conceptGraph/"; /** * the default concept id for the root. override with -Dytex.defaultRootId */ private static final String DEFAULT_ROOT_ID = "C0000000"; /** * ignore forbidden concepts. list Taken from umls-interface. f concept is * one of the following just return #C1274012|Ambiguous concept (inactive * concept) if($concept=~/C1274012/) { return 1; } #C1274013|Duplicate * concept (inactive concept) if($concept=~/C1274013/) { return 1; } * #C1276325|Reason not stated concept (inactive concept) * if($concept=~/C1276325/) { return 1; } #C1274014|Outdated concept * (inactive concept) if($concept=~/C1274014/) { return 1; } * #C1274015|Erroneous concept (inactive concept) if($concept=~/C1274015/) { * return 1; } #C1274021|Moved elsewhere (inactive concept) * if($concept=~/C1274021/) { return 1; } #C1443286|unapproved attribute * if($concept=~/C1443286/) { return 1; } #C1274012|non-current concept - * ambiguous if($concept=~/C1274012/) { return 1; } #C2733115|limited status * concept if($concept=~/C2733115/) { return 1; } */ private static final String defaultForbiddenConceptArr[] = new String[] { "C1274012", "C1274013", "C1276325", "C1274014", "C1274015", "C1274021", "C1443286", "C1274012", "C2733115" }; private static Set<String> defaultForbiddenConcepts; private static final Log log = LogFactory.getLog(ConceptDaoImpl.class); static { defaultForbiddenConcepts = new HashSet<String>(); defaultForbiddenConcepts.addAll(Arrays.asList(defaultForbiddenConceptArr)); } /** * create a concept graph. * * This expects a property file in the classpath under * CONCEPT_GRAPH_PATH/[name].xml * <p/> * If the properties file is found in a directory, the concept graph will be * written there. * <p/> * Else (e.g. if the props file is coming from a jar), the concept graph * will be written to the directory specified via the system property/ytex * property 'org.apache.ctakes.ytex.conceptGraphDir' * <p/> * Else if the 'org.apache.ctakes.ytex.conceptGraphDir' property is not * defined, the concept graph will be written to the conceptGraph * subdirectory relative to ytex.properties (if ytex.properties is in a * directory). * * @param args */ @SuppressWarnings("static-access") public static void main(String args[]) throws ParseException, IOException { Options options = new Options(); options.addOption(OptionBuilder.withArgName("name").hasArg().isRequired() .withDescription("name of concept graph. A property file with the name " + CONCEPT_GRAPH_PATH + "/[name].xml must exist on the classpath") .create("name")); try { CommandLineParser parser = new GnuParser(); CommandLine line = parser.parse(options, args); String name = line.getOptionValue("name"); String propRes = CONCEPT_GRAPH_PATH + name + ".xml"; URL url = ConceptDaoImpl.class.getClassLoader().getResource(propRes); if (url == null) { System.out.println("properties file could not be located: " + propRes); return; } // load properties Properties props = new Properties(); InputStream is = ConceptDaoImpl.class.getClassLoader().getResourceAsStream(propRes); try { props.loadFromXML(is); } finally { is.close(); } // determine directory for concept graph - attempt to put in same // dir as props File fDir = null; if ("file".equals(url.getProtocol())) { File f; try { f = new File(url.toURI()); } catch (URISyntaxException e) { f = new File(url.getPath()); } fDir = f.getParentFile(); } String conceptGraphQuery = props.getProperty("ytex.conceptGraphQuery"); String strCheckCycle = props.getProperty("ytex.checkCycle", "true"); String forbiddenConceptList = props.getProperty("ytex.forbiddenConcepts"); Set<String> forbiddenConcepts; if (forbiddenConceptList != null) { forbiddenConcepts = new HashSet<String>(); forbiddenConcepts.addAll(Arrays.asList(forbiddenConceptList.split(","))); } else { forbiddenConcepts = defaultForbiddenConcepts; } boolean checkCycle = true; if ("false".equalsIgnoreCase(strCheckCycle) || "no".equalsIgnoreCase(strCheckCycle)) checkCycle = false; if (!Strings.isNullOrEmpty(name) && !Strings.isNullOrEmpty(conceptGraphQuery)) { KernelContextHolder.getApplicationContext().getBean(ConceptDao.class).createConceptGraph( fDir != null ? fDir.getAbsolutePath() : null, name, conceptGraphQuery, checkCycle, forbiddenConcepts); } else { printHelp(options); } } catch (ParseException pe) { printHelp(options); } } private static void printHelp(Options options) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("java " + ConceptDaoImpl.class.getName() + " generate concept graph", options); } private IntrinsicInfoContentEvaluator intrinsicInfoContentEvaluator; private JdbcTemplate jdbcTemplate; private SessionFactory sessionFactory; private Properties ytexProperties; /** * add the relationship to the concept map * * @param conceptMap * @param conceptIndexMap * @param conceptList * @param roots * @param conceptPair */ private void addRelation(ConceptGraph cg, Set<String> roots, String childCUI, String parentCUI, boolean checkCycle, Set<String> forbiddenConcepts) { if (forbiddenConcepts.contains(childCUI) || forbiddenConcepts.contains(parentCUI)) { // ignore relationships to useless concepts if (log.isDebugEnabled()) log.debug( "skipping relation because of forbidden concept: par=" + parentCUI + " child=" + childCUI); return; } // ignore self relations if (!childCUI.equals(parentCUI)) { boolean parNull = false; // get parent from cui map ConcRel crPar = cg.getConceptMap().get(parentCUI); if (crPar == null) { parNull = true; // parent not in cui map - add it crPar = cg.addConcept(parentCUI); // this is a candidate root - add it to the set of roots roots.add(parentCUI); } // get the child cui ConcRel crChild = cg.getConceptMap().get(childCUI); // crPar already has crChild, return if (crChild != null && crPar.getChildren().contains(crChild)) return; // avoid cycles - don't add child cui if it is an ancestor // of the parent. if the child is not yet in the map, then it can't // possibly induce a cycle. // if the parent is not yet in the map, it can't induce a cycle // else check for cycles // @TODO: this is very inefficient. implement feedback arc algo boolean bCycle = !parNull && crChild != null && checkCycle && checkCycle(crPar, crChild); if (bCycle) { log.warn("skipping relation that induces cycle: par=" + parentCUI + ", child=" + childCUI); } else { if (crChild == null) { // child not in cui map - add it crChild = cg.addConcept(childCUI); } else { // remove the cui from the list of candidate roots if (roots.contains(childCUI)) roots.remove(childCUI); } // link child to parent and vice-versa crPar.getChildren().add(crChild); crChild.getParents().add(crPar); } } } /* * (non-Javadoc) * * @see org.apache.ctakes.ytex.kernel.dao.ConceptDao#createConceptGraph */ @Override public void createConceptGraph(String dir, String name, String query, final boolean checkCycle, final Set<String> forbiddenConcepts) throws IOException { ConceptGraph conceptGraph = this.readConceptGraph(name); if (conceptGraph != null) { if (log.isWarnEnabled()) log.warn( "createConceptGraph(): concept graph already exists, will not create a new one. Delete existing concept graph if you want to recreate it."); } else { String outputDir = dir; if (Strings.isNullOrEmpty(outputDir)) { outputDir = getDefaultConceptGraphDir(); } if (Strings.isNullOrEmpty(outputDir)) { throw new IllegalArgumentException( "could not determine default concept graph directory; please set property org.apache.ctakes.ytex.conceptGraphDir"); } if (log.isInfoEnabled()) log.info("createConceptGraph(): file not found, creating concept graph from database."); final ConceptGraph cg = new ConceptGraph(); final Set<String> roots = new HashSet<String>(); this.jdbcTemplate.query(query, new RowCallbackHandler() { int nRowsProcessed = 0; @Override public void processRow(ResultSet rs) throws SQLException { String child = rs.getString(1); String parent = rs.getString(2); addRelation(cg, roots, child, parent, checkCycle, forbiddenConcepts); nRowsProcessed++; if (nRowsProcessed % 10000 == 0) { log.info("processed " + nRowsProcessed + " edges"); } } }); // set the root // if there is only one potential root, use it // else use a synthetic root and add all the roots as its children String rootId = null; if (log.isDebugEnabled()) log.debug("roots: " + roots); if (roots.size() == 1) { rootId = roots.iterator().next(); } else { rootId = System.getProperty("org.apache.ctakes.ytex.defaultRootId", DEFAULT_ROOT_ID); ConcRel crRoot = cg.addConcept(rootId); for (String crChildId : roots) { ConcRel crChild = cg.getConceptMap().get(crChildId); crRoot.getChildren().add(crChild); crChild.getParents().add(crRoot); } } cg.setRoot(rootId); // can't get the maximum depth unless we're sure there are no // cycles if (checkCycle) { log.info("computing intrinsic info for concept graph: " + name); this.intrinsicInfoContentEvaluator.evaluateIntrinsicInfoContent(name, outputDir, cg); } writeConceptGraph(outputDir, name, cg); } } /* * (non-Javadoc) * * @see * org.apache.ctakes.ytex.kernel.dao.ConceptDao#getConceptGraph(java.util * .Set) */ public ConceptGraph getConceptGraph(String name) { ConceptGraph cg = this.readConceptGraph(name); if (cg != null) { this.initializeConceptGraph(cg); if (log.isInfoEnabled()) { log.info(String.format("concept graph %s, vertices: %s", name, cg.getConceptList().size())); } } return cg; } private File urlToFile(URL url) { if (url != null && "file".equals(url.getProtocol())) { File f; try { f = new File(url.toURI()); } catch (URISyntaxException e) { f = new File(url.getPath()); } return f; } else { return null; } } /** * use value of org.apache.ctakes.ytex.conceptGraphDir if defined. else try * to determine ytex.properties location and use the conceptGraph directory * there. else return null * * @return */ public String getDefaultConceptGraphDir() { String cdir = System.getProperty("org.apache.ctakes.ytex.conceptGraphDir", ytexProperties.getProperty("org.apache.ctakes.ytex.conceptGraphDir")); // default to [ytex.properties directory]/conceptGraph if (Strings.isNullOrEmpty(cdir)) { URL url = this.getClass().getResource("/org/apache/ctakes/ytex/ytex.properties"); File f = urlToFile(url); if (f != null) { File baseDir = f.getParentFile(); if (baseDir.exists() && baseDir.isDirectory()) { cdir = baseDir.getAbsolutePath() + File.separator + "conceptGraph"; } } } return cdir; } public DataSource getDataSource(DataSource ds) { return this.jdbcTemplate.getDataSource(); } public IntrinsicInfoContentEvaluator getIntrinsicInfoContentEvaluator() { return intrinsicInfoContentEvaluator; } public SessionFactory getSessionFactory() { return sessionFactory; } public Properties getYtexProperties() { return ytexProperties; } private boolean checkCycle(ConcRel crPar, ConcRel crChild) { HashSet<Integer> visitedNodes = new HashSet<Integer>(); return hasAncestor(crPar, crChild, visitedNodes); } /** * check cycle. * * @param crPar * parent * @param crChild * child that should not be an ancestor of parent * @param visitedNodes * nodes we've visited in our search. keep track of this to avoid * visiting the same node multiple times * @return true if crChild is an ancestor of crPar */ private boolean hasAncestor(ConcRel crPar, ConcRel crChild, HashSet<Integer> visitedNodes) { // see if we've already visited this node - if yes then no need to redo // this if (visitedNodes.contains(crPar.getNodeIndex())) return false; // see if we're the same if (crPar.getNodeIndex() == crChild.getNodeIndex()) return true; // recurse for (ConcRel c : crPar.getParents()) { if (hasAncestor(c, crChild, visitedNodes)) return true; } // add ourselves to the set of visited nodes so we no not to revisit // this visitedNodes.add(crPar.getNodeIndex()); return false; } /** * replace cui strings in concrel with references to other nodes. initialize * the concept list * * @param cg * @return */ private ConceptGraph initializeConceptGraph(ConceptGraph cg) { ImmutableMap.Builder<String, ConcRel> mb = new ImmutableMap.Builder<String, ConcRel>(); for (ConcRel cr : cg.getConceptList()) { // use adjacency list representation for concept graphs that have // cycles if (cg.getDepthMax() > 0) cr.constructRel(cg.getConceptList()); mb.put(cr.getConceptID(), cr); } cg.setConceptMap(mb.build()); return cg; } private ConceptGraph readConceptGraph(String name) { ObjectInputStream is = null; try { // try loading from classpath InputStream resIs = this.getClass().getClassLoader() .getResourceAsStream(CONCEPT_GRAPH_PATH + name + ".gz"); if (resIs == null) { String cdir = this.getDefaultConceptGraphDir(); if (cdir == null) { throw new IllegalArgumentException( "could not determine default concept graph directory; please set property org.apache.ctakes.ytex.conceptGraphDir"); } File f = new File(cdir + "/" + name + ".gz"); log.info( "could not load conceptGraph from classpath, attempt to load from: " + f.getAbsolutePath()); if (f.exists()) { resIs = new FileInputStream(f); } else { log.info(f.getAbsolutePath() + " not found, cannot load concept graph"); } } else { log.info("loading concept graph from " + this.getClass().getClassLoader().getResource(CONCEPT_GRAPH_PATH + name + ".gz")); } if (resIs != null) { is = new ObjectInputStream(new BufferedInputStream(new GZIPInputStream(resIs))); return (ConceptGraph) is.readObject(); } else { log.info("could not load conceptGraph: " + name); return null; } } catch (IOException ioe) { throw new RuntimeException(ioe); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } finally { if (is != null) try { is.close(); } catch (IOException e) { e.printStackTrace(); } } } public void setDataSource(DataSource ds) { this.jdbcTemplate = new JdbcTemplate(ds); } public void setIntrinsicInfoContentEvaluator(IntrinsicInfoContentEvaluator intrinsicInfoContentEvaluator) { this.intrinsicInfoContentEvaluator = intrinsicInfoContentEvaluator; } // /** // * get maximum depth of graph. // * // * @param roots // * @param conceptMap // * @return // */ // private int calculateDepthMax(String rootId, Map<String, ConcRel> // conceptMap) { // ConcRel crRoot = conceptMap.get(rootId); // return crRoot.depthMax(); // } public void setSessionFactory(SessionFactory sessionFactory) { this.sessionFactory = sessionFactory; } public void setYtexProperties(Properties ytexProperties) { this.ytexProperties = new Properties(ytexProperties); this.ytexProperties.putAll(System.getProperties()); } // /** // * add parent to all descendants of crChild // * // * @param crPar // * @param crChild // * @param ancestorCache // */ // private void updateDescendants(Set<Integer> ancestorsPar, ConcRel // crChild, // Map<Integer, Set<Integer>> ancestorCache, int depth) { // if (ancestorCache != null) { // Set<Integer> ancestors = ancestorCache.get(crChild.nodeIndex); // if (ancestors != null) // ancestors.addAll(ancestorsPar); // // recurse // for (ConcRel crD : crChild.getChildren()) { // updateDescendants(ancestorsPar, crD, ancestorCache, depth + 1); // } // } // } /** * write the concept graph, create parent directories as required * * @param name * @param cg */ private void writeConceptGraph(String dir, String name, ConceptGraph cg) { ObjectOutputStream os = null; String outputDir = dir; File cgFile = new File(outputDir + "/" + name + ".gz"); log.info("writing concept graph: " + cgFile.getAbsolutePath()); if (!cgFile.getParentFile().exists()) cgFile.getParentFile().mkdirs(); try { os = new ObjectOutputStream( new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(cgFile)))); // replace the writable list with an immutable list cg.setConceptList(ImmutableList.copyOf(cg.getConceptList())); os.writeObject(cg); } catch (IOException ioe) { throw new RuntimeException(ioe); } finally { if (os != null) try { os.close(); } catch (IOException e) { e.printStackTrace(); } } } }