Java tutorial
/* * The baseCode project * * Copyright (c) 2010 University of British Columbia * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package ubic.basecode.ontology; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; import java.nio.file.Files; import java.nio.file.StandardCopyOption; import java.util.Collection; import java.util.HashSet; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.hp.hpl.jena.ontology.Individual; import com.hp.hpl.jena.ontology.OntClass; import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.ontology.OntModelSpec; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.ModelMaker; import com.hp.hpl.jena.util.iterator.ExtendedIterator; import ubic.basecode.ontology.model.OntologyIndividual; import ubic.basecode.ontology.model.OntologyIndividualImpl; import ubic.basecode.ontology.model.OntologyProperty; import ubic.basecode.ontology.model.OntologyResource; import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.model.OntologyTermImpl; import ubic.basecode.ontology.model.PropertyFactory; import ubic.basecode.util.Configuration; /** * Reads ontologies from OWL resources * * @author paul */ public class OntologyLoader { private static Logger log = LoggerFactory.getLogger(OntologyLoader.class); private static final int MAX_CONNECTION_TRIES = 3; private static final String OLD_CACHE_SUFFIX = ".old"; private static final String TMP_CACHE_SUFFIX = ".tmp"; /** * @param url * @param model * @return */ public static Collection<OntologyResource> initialize(String url, OntModel model) { Collection<OntologyResource> result = new HashSet<>(); ExtendedIterator<OntClass> classIt = model.listClasses(); int count = 0; log.debug("Reading classes for ontology: " + url); while (classIt.hasNext()) { OntClass element = classIt.next(); if (element.isAnon()) continue; OntologyTerm ontologyTerm = new OntologyTermImpl(element); result.add(ontologyTerm); if (++count % 1000 == 0) { log.debug("Loaded " + count + " terms, last was " + ontologyTerm); } } log.debug("Loaded " + count + " terms"); ExtendedIterator<com.hp.hpl.jena.ontology.ObjectProperty> propIt = model.listObjectProperties(); count = 0; log.debug("Reading object properties..."); while (propIt.hasNext()) { com.hp.hpl.jena.ontology.ObjectProperty element = propIt.next(); OntologyProperty ontologyTerm = PropertyFactory.asProperty(element); if (ontologyTerm == null) continue; // couldn't be converted for some reason. result.add(ontologyTerm); if (++count % 1000 == 0) { log.debug("Loaded " + count + " object properties, last was " + ontologyTerm); } } ExtendedIterator<com.hp.hpl.jena.ontology.DatatypeProperty> dtPropIt = model.listDatatypeProperties(); log.debug("Reading datatype properties..."); while (dtPropIt.hasNext()) { com.hp.hpl.jena.ontology.DatatypeProperty element = dtPropIt.next(); OntologyProperty ontologyTerm = PropertyFactory.asProperty(element); if (ontologyTerm == null) continue; // couldn't be converted for some reason. result.add(ontologyTerm); if (++count % 1000 == 0) { log.debug("Loaded " + count + " datatype properties, last was " + ontologyTerm); } } log.debug("Loaded " + count + " properties"); ExtendedIterator<Individual> indiIt = model.listIndividuals(); count = 0; log.debug("Reading individuals..."); while (indiIt.hasNext()) { Individual element = indiIt.next(); if (element.isAnon()) continue; OntologyIndividual ontologyTerm = new OntologyIndividualImpl(element); result.add(ontologyTerm); if (++count % 1000 == 0) { log.debug("Loaded " + count + " individuals, last was " + ontologyTerm); } } log.debug("Loaded " + count + " individuals"); return result; } /** * Load an ontology into memory. Use this type of model when fast access is critical and memory is available. * * @param is * @param url, used as a key * @param spec * @return */ public static OntModel loadMemoryModel(InputStream is, String url, OntModelSpec spec) { OntModel model = getMemoryModel(url, spec); model.read(is, null); return model; } /** * Load an ontology into memory. Use this type of model when fast access is critical and memory is available. Uses * OWL_MEM_TRANS_INF * * @param url * @return */ public static OntModel loadMemoryModel(String url) { return loadMemoryModel(url, OntModelSpec.OWL_MEM_TRANS_INF); } /** * Load an ontology into memory. Use this type of model when fast access is critical and memory is available. Uses * OWL_MEM_TRANS_INF * If load from URL fails, attempt to load from disk cache under @cacheName. * * @param url * @return */ public static OntModel loadMemoryModel(String url, String cacheName) { return loadMemoryModel(url, OntModelSpec.OWL_MEM_TRANS_INF, cacheName); } /** * Load an ontology into memory. Use this type of model when fast access is critical and memory is available. * * @param url * @return */ public static OntModel loadMemoryModel(String url, OntModelSpec spec) { return loadMemoryModel(url, spec, null); } /** * Load an ontology into memory. Use this type of model when fast access is critical and memory is available. * If load from URL fails, attempt to load from disk cache under @cacheName. * * @param url * @param spec e.g. OWL_MEM_TRANS_INF * @param cacheName unique name of this ontology, will be used to load from disk in case of failed url connection * @return */ public static OntModel loadMemoryModel(String url, OntModelSpec spec, String cacheName) { StopWatch timer = new StopWatch(); timer.start(); OntModel model = getMemoryModel(url, spec); URLConnection urlc = null; int tries = 0; while (tries < MAX_CONNECTION_TRIES) { try { urlc = new URL(url).openConnection(); // help ensure mis-configured web servers aren't causing trouble. urlc.setRequestProperty("Accept", "application/rdf+xml"); try { HttpURLConnection c = (HttpURLConnection) urlc; c.setInstanceFollowRedirects(true); } catch (ClassCastException e) { // not via http, using a FileURLConnection. } if (tries > 0) { log.info("Retrying connecting to " + url + " [" + tries + "/" + MAX_CONNECTION_TRIES + " of max tries"); } else { log.info("Connecting to " + url); } urlc.connect(); // Will error here on bad URL if (urlc instanceof HttpURLConnection) { String newUrl = urlc.getHeaderField("Location"); if (StringUtils.isNotBlank(newUrl)) { log.info("Redirect to " + newUrl); urlc = new URL(newUrl).openConnection(); // help ensure mis-configured web servers aren't causing trouble. urlc.setRequestProperty("Accept", "application/rdf+xml"); urlc.connect(); } } break; } catch (IOException e) { // try to recover. log.error(e + " retrying?"); tries++; } } if (urlc != null) { try (InputStream in = urlc.getInputStream();) { Reader reader; if (cacheName != null) { // write tmp to disk File tempFile = getTmpDiskCachePath(cacheName); if (tempFile == null) { reader = new InputStreamReader(in); } else { tempFile.getParentFile().mkdirs(); Files.copy(in, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING); reader = new FileReader(tempFile); } } else { // Skip the cache reader = new InputStreamReader(in); } assert reader != null; try (BufferedReader buf = new BufferedReader(reader);) { model.read(buf, url); } log.info("Load model: " + timer.getTime() + "ms"); } catch (IOException e) { log.error(e.getMessage(), e); } } if (cacheName != null) { File f = getDiskCachePath(cacheName); File tempFile = getTmpDiskCachePath(cacheName); File oldFile = getOldDiskCachePath(cacheName); if (model.isEmpty()) { // Attempt to load from disk cache if (f == null) { throw new RuntimeException( "Ontology cache directory required to load from disk: ontology.cache.dir"); } if (f.exists() && !f.isDirectory()) { try (BufferedReader buf = new BufferedReader(new FileReader(f));) { model.read(buf, url); // We successfully loaded the cached ontology. Copy the loaded ontology to oldFile // so that we don't recreate indices during initialization based on a false change in // the ontology. Files.copy(f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING); log.info("Load model from disk: " + timer.getTime() + "ms"); } catch (IOException e) { log.error(e.getMessage(), e); throw new RuntimeException( "Ontology failed load from URL (" + url + ") and disk cache: " + cacheName); } } else { throw new RuntimeException("Ontology failed load from URL (" + url + ") and disk cache does not exist: " + cacheName); } } else { // Model was successfully loaded into memory from URL with given cacheName // Save cache to disk (rename temp file) log.info("Caching ontology to disk: " + cacheName); if (f != null) { try { // Need to compare previous to current so instead of overwriting we'll move the old file f.createNewFile(); Files.move(f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING); Files.move(tempFile.toPath(), f.toPath(), StandardCopyOption.REPLACE_EXISTING); } catch (IOException e) { log.error(e.getMessage(), e); } } else { log.warn("Ontology cache directory required to save to disk: ontology.cache.dir"); } } } assert !model.isEmpty(); return model; } public static boolean hasChanged(String cacheName) { boolean changed = false; // default if (StringUtils.isBlank(cacheName)) { return changed; } File newFile = getDiskCachePath(cacheName); File oldFile = getOldDiskCachePath(cacheName); try { // This might be slow considering it calls IOUtils.contentsEquals which compares byte-by-byte // in the worst case scenario. // In this case consider using NIO for higher-performance IO using Channels and Buffers. // Ex. Use a 4MB Memory-Mapped IO operation. if (newFile != null && oldFile != null) changed = !FileUtils.contentEquals(newFile, oldFile); } catch (IOException e) { log.error(e.getMessage()); } return changed; } public static boolean deleteOldCache(String cacheName) { File f = getOldDiskCachePath(cacheName); if (f != null) return f.delete(); return false; } /** * Get model that is entirely in memory with default OntModelSpec.OWL_MEM_RDFS_INF. * * @param url * @return */ static OntModel getMemoryModel(String url) { return getMemoryModel(url, OntModelSpec.OWL_MEM_RDFS_INF); } /** * Get model that is entirely in memory. * * @param url * @param specification * @return */ static OntModel getMemoryModel(String url, OntModelSpec specification) { OntModelSpec spec = new OntModelSpec(specification); ModelMaker maker = ModelFactory.createMemModelMaker(); Model base = maker.createModel(url, false); spec.setImportModelMaker(maker); spec.getDocumentManager().setProcessImports(false); OntModel model = ModelFactory.createOntologyModel(spec, base); model.setStrictMode(false); // fix for owl2 files return model; } /** * @param name * @return */ public static File getDiskCachePath(String name) { String ontologyDir = Configuration.getString("ontology.cache.dir"); // e.g., /something/gemmaData/ontologyCache if (StringUtils.isBlank(ontologyDir) || StringUtils.isBlank(name)) { return null; } if (!new File(ontologyDir).exists()) { new File(ontologyDir).mkdirs(); } assert ontologyDir != null; String path = ontologyDir + File.separator + "ontology" + File.separator + name; File indexFile = new File(path); return indexFile; } static File getOldDiskCachePath(String name) { File indexFile = getDiskCachePath(name); if (indexFile == null) { return null; } return new File(indexFile.getAbsolutePath() + OLD_CACHE_SUFFIX); } static File getTmpDiskCachePath(String name) { File indexFile = getDiskCachePath(name); if (indexFile == null) { return null; } return new File(indexFile.getAbsolutePath() + TMP_CACHE_SUFFIX); } }