Java tutorial
/* * Copyright (c) 2014 MetaSolutions AB <info@metasolutions.se> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.entrystore.ldcache.cache.impl; import com.google.common.cache.CacheBuilder; import com.google.common.util.concurrent.RateLimiter; import org.apache.log4j.Logger; import org.entrystore.ldcache.cache.Cache; import org.entrystore.ldcache.cache.Resource; import org.entrystore.ldcache.util.HttpUtil; import org.entrystore.ldcache.util.JsonUtil; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.openrdf.model.Model; import org.openrdf.model.URI; import org.openrdf.model.Value; import org.openrdf.model.impl.LinkedHashModel; import org.openrdf.repository.Repository; import org.openrdf.repository.RepositoryException; import org.openrdf.repository.sail.SailRepository; import org.openrdf.sail.Sail; import org.openrdf.sail.memory.MemoryStore; import org.openrdf.sail.nativerdf.NativeStore; import java.io.File; import java.util.Date; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; /** * @author Hannes Ebner */ public class CacheImpl implements Cache { static Logger log = Logger.getLogger(CacheImpl.class); Repository repository; JSONObject config; com.google.common.cache.Cache<String, RateLimiter> rateLimiters; double rateLimit = 2.0; ExecutorService executor; public CacheImpl(JSONObject config) throws JSONException { this.config = config; JSONObject repoConfig = config.getJSONObject("repository"); String repositoryType = repoConfig.getString("type"); String indexes = repoConfig.getString("indexes"); Sail sail = null; if ("memory".equalsIgnoreCase(repositoryType)) { sail = new MemoryStore(); } else if ("native".equalsIgnoreCase(repositoryType)) { sail = new NativeStore(new File(java.net.URI.create(repoConfig.getString("uri")))); if (indexes != null && indexes.trim().length() > 3) { ((NativeStore) sail).setTripleIndexes(indexes); } } else { throw new IllegalArgumentException("Invalid repository type"); } try { this.repository = new SailRepository(sail); repository.initialize(); } catch (RepositoryException e) { log.error(e.getMessage()); } JSONObject cacheConfig = config.getJSONObject("cache"); if (cacheConfig.has("rateLimit")) { rateLimit = cacheConfig.getDouble("rateLimit"); } rateLimiters = CacheBuilder.newBuilder().expireAfterAccess(10, TimeUnit.MINUTES).maximumSize(1000).build(); int threadPoolSize = 5; if (cacheConfig.has("threadPoolSize")) { threadPoolSize = cacheConfig.getInt("threadPoolSize"); } log.info("Creating fixed thread pool with size " + threadPoolSize); executor = Executors.newFixedThreadPool(threadPoolSize); if (cacheConfig.has("requestTimeout")) { long timeout = cacheConfig.getLong("requestTimeout"); log.info("Setting request timeout to " + timeout); HttpUtil.setTimeouts(timeout); } populateDatasets(config.getJSONArray("datasets")); } private void populateDatasets(final JSONArray datasets) throws JSONException { for (int i = 0; i < datasets.length(); i++) { final int idx = i; executor.submit(new Runnable() { @Override public void run() { try { populateResources(datasets.getJSONObject(idx)); } catch (JSONException e) { log.error(e.getMessage()); } } }); } } private void populateResources(JSONObject dataset) throws JSONException { String name = "no name found"; if (dataset.has("name")) { name = dataset.getString("name"); } log.info("Populating dataset: " + name); org.json.JSONArray resources = null; if (dataset.has("resources")) { resources = dataset.getJSONArray("resources"); } org.json.JSONArray follow = null; if (dataset.has("follow")) { follow = dataset.getJSONArray("follow"); } org.json.JSONObject followTuples = null; if (dataset.has("followTuples")) { followTuples = dataset.getJSONObject("followTuples"); } org.json.JSONArray includeDestinations = null; if (dataset.has("includeDestinations")) { includeDestinations = dataset.getJSONArray("includeDestinations"); } int followDepth = 2; if (dataset.has("followDepth")) { followDepth = dataset.getInt("followDepth"); } loadAndCacheResources(JsonUtil.jsonArrayToValueSet(resources), JsonUtil.jsonArrayToValueSet(follow), JsonUtil.jsonObjectToMap(followTuples), JsonUtil.jsonArrayToStringSet(includeDestinations), followDepth); } private void loadAndCacheResources(Set<Value> resources, Set<Value> propertiesToFollow, Map<Value, Value> followTuples, Set<String> includeDestinations, Set<URI> visited, int level, int depth) { for (Value r : resources) { if (!(r instanceof URI)) { continue; } if (visited.contains(r)) { log.debug("Already visited, skipping: " + r); continue; } // level > 0 to be able to re-run from index resources if (level > 0 && RdfResource.hasResource(repository, (URI) r)) { log.debug("Already in repository, skipping: " + r); continue; } throttle((URI) r); Model graph = HttpUtil.getModelFromResponse(HttpUtil.getResourceFromURL(r.toString(), 0)); if (graph != null) { RdfResource res = new RdfResource((URI) r, graph, new Date()); RdfResource.saveToRepository(this.repository, res); log.info("Cached in local repository: " + r); if (propertiesToFollow != null && level < depth) { for (Value prop : propertiesToFollow) { if (prop instanceof URI) { Set<Value> objects = new HashSet<>(graph.filter(null, (URI) prop, null).objects()); if (followTuples != null) { objects.addAll(getMatchingSubjects(graph, followTuples)); } objects = filterResources(objects, includeDestinations); if (objects.size() == 0) { continue; } log.debug("Following: " + prop); loadAndCacheResources(objects, propertiesToFollow, followTuples, includeDestinations, visited, level + 1, depth); } } } } else { log.warn("Model was null for: " + r.toString()); } visited.add((URI) r); } } private Model getMergedGraphs(Set<Value> resources, Set<Value> follow, Map<Value, Value> followTuples, Set<String> includeDestinations, Set<URI> visited, int level, int depth) { Model result = new LinkedHashModel(); for (Value r : resources) { if (!(r instanceof URI)) { continue; } if (visited.contains(r)) { log.debug("Already visited, skipping: " + r); continue; } visited.add((URI) r); Resource res = RdfResource.loadFromRepository(this.repository, (URI) r); if (res == null || res.getGraph() == null) { continue; } log.info("Loaded from local repository: " + r); Model graph = res.getGraph(); result.addAll(graph); if (follow != null && level < depth + 1) { for (Value prop : follow) { if (prop instanceof URI) { Set<Value> objects = new HashSet<>(graph.filter(null, (URI) prop, null).objects()); if (followTuples != null) { objects.addAll(getMatchingSubjects(graph, followTuples)); } objects = filterResources(objects, includeDestinations); if (objects.size() == 0) { continue; } log.debug("Following: " + prop); result.addAll(getMergedGraphs(objects, follow, followTuples, includeDestinations, visited, ++level, depth)); } } } } return result; } @Override public void loadAndCacheResources(Set<Value> resources, Set<Value> follow, Map<Value, Value> followTuples, Set<String> includeDestinations, int depth) { loadAndCacheResources(resources, follow, followTuples, includeDestinations, new HashSet<URI>(), 0, depth); } @Override public Model getMergedGraphs(Set<Value> resources, Set<Value> follow, Map<Value, Value> followTuples, Set<String> includeDestinations, int depth) { return getMergedGraphs(resources, follow, followTuples, includeDestinations, new HashSet<URI>(), 0, depth); } public Repository getRepository() { return this.repository; } private Set<Value> filterResources(Set<Value> resources, Set<String> allowedPrefixes) { if (resources == null || allowedPrefixes == null) { throw new IllegalArgumentException("Parameters must not be null"); } if (allowedPrefixes.contains("*")) { return resources; } Set<Value> result = new HashSet<>(); for (Value v : resources) { for (String p : allowedPrefixes) { if (v.stringValue().startsWith(p)) { result.add(v); } } } return result; } private Set<Value> getMatchingSubjects(Model model, Map<Value, Value> tuplesPO) { if (model == null || tuplesPO == null) { throw new IllegalArgumentException(); } Set<Value> result = new HashSet<>(); for (Value v : tuplesPO.keySet()) { result.addAll(model.filter(null, (URI) v, tuplesPO.get(v)).subjects()); } return result; } void throttle(URI uri) { String hostname = java.net.URI.create(uri.stringValue()).getHost(); try { rateLimiters.get(hostname, new Callable<RateLimiter>() { @Override public RateLimiter call() throws Exception { return RateLimiter.create(rateLimit); } }).acquire(); } catch (ExecutionException e) { log.error(e.getMessage()); } } }