Java tutorial
/* $This file is distributed under the terms of the license in /doc/license.txt$ */ package edu.cornell.mannlib.ld4lindexing.documents; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import edu.cornell.mannlib.ld4lindexing.QueryRunner; import edu.cornell.mannlib.ld4lindexing.solrservers.SolrDocument; import edu.cornell.mannlib.ld4lindexing.triplestores.TripleStore; /** * Builds a Solr document for an Instance. */ public class InstanceDocument extends BaseDocument { private static final Log log = LogFactory.getLog(InstanceDocument.class); private static final String NAMESPACE_WORLDCAT = "http://www.worldcat.org/oclc/"; private static final String PROP_SAME_AS = "http://www.w3.org/2002/07/owl#sameAs"; private static final String PROP_INSTANCE_OF = "http://bib.ld4l.org/ontology/instanceOf"; private static final String PROP_IDENTIFIED_BY = "http://bib.ld4l.org/ontology/identifiedBy"; private static final String PROP_HAS_PROVISION = "http://bib.ld4l.org/ontology/hasProvision"; private static final String PROP_EXTENT = "http://bib.ld4l.org/ontology/extent"; private static final String PROP_DIMENSIONS = "http://bib.ld4l.org/ontology/dimensions"; private static final String PROP_ILLUSTRATION_NOTE = "http://bib.ld4l.org/ontology/illustrationNote"; private static final String PROP_SUPPLEMENTARY_CONTENT_NOTE = "http://bib.ld4l.org/ontology/legacy/supplementaryContentNote"; private static final String TYPE_IDENTIFIER = "http://bib.ld4l.org/ontology/Identifier"; private static final String QUERY_IDENTIFIER_CONTENTS = "" // + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" // + "SELECT ?type ?value \n" // + "WHERE { \n" // + " ?id a ?type . \n" // + " ?id rdf:value ?value . \n" // + "} LIMIT 1000 \n"; private static final String QUERY_PUBLISHER_PROVISION = "" // + "PREFIX dc: <http://purl.org/dc/elements/1.1/> \n" // + "PREFIX ld4l: <http://bib.ld4l.org/ontology/> \n" // + "PREFIX prov: <http://www.w3.org/ns/prov#> \n" // + "PREFIX foaf: <http://http://xmlns.com/foaf/0.1/> \n" // + "SELECT ?agent_name, ?location_name, ?date \n" // + "WHERE { \n" // + " ?provision a ld4l:PublisherProvision . \n" // + " OPTIONAL { \n" // + " ?provision dc:date ?date . \n" // + " } \n" // + " OPTIONAL { \n" // + " ?provision prov:agent ?agent . \n" // + " ?agent foaf:name ?agent_name . \n" // + " } \n" // + " OPTIONAL { \n" // + " ?provision prov:atLocation ?location . \n" // + " ?location foaf:name ?location_name . \n" // + " } \n" // + "} LIMIT 1000 \n"; private static final String QUERY_SHELF_MARK = "" // + "PREFIX ld4l: <http://bib.ld4l.org/ontology/> \n" // + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" // + "SELECT ?value \n" // + "WHERE { \n" // + " ?holding ld4l:isHoldingFor ?instance . \n" // + " ?holding ld4l:hasShelfMark ?sm . \n" // + " ?sm rdf:value ?value . \n" // + "} LIMIT 1000 \n"; public InstanceDocument(String uri, DocumentStatsAccumulator stats, TripleStore ts) { super(uri, stats, ts); } @Override public void populate() { populateProperties(); populateValues(); assembleSolrDocument(); addToStats(); } public void populateValues() { values.put("classes", findClasses()); values.put("titles", findTitles()); values.put("instance_of", findInstanceOfs()); values.put("worldcat_ids", findWorldcatIds()); values.put("same_as_uris", findSameAsUris()); values.put("identifiers", findIdentifiers()); values.put("publishers", findPublisherProvisions()); values.put("holdings", findHoldings()); values.put("extents", findPropertyStrings(PROP_EXTENT)); values.put("dimensions", findPropertyStrings(PROP_DIMENSIONS)); values.put("illustration_notes", findPropertyStrings(PROP_ILLUSTRATION_NOTE)); values.put("supplementary_content_notes", findPropertyStrings(PROP_SUPPLEMENTARY_CONTENT_NOTE)); } private List<LinkToken> findInstanceOfs() { List<LinkToken> list = new ArrayList<>(); List<String> workUris = findPropertyStrings(PROP_INSTANCE_OF); for (String workUri : workUris) { list.add(new LinkToken(findTitlesFor(workUri).get(0), workUri)); } return list; } private List<String> findSameAsUris() { List<String> sameAsUris = findPropertyStrings(PROP_SAME_AS); for (Iterator<String> it = sameAsUris.iterator(); it.hasNext();) { String sameAsUri = it.next(); if (sameAsUri.startsWith(NAMESPACE_WORLDCAT)) { it.remove(); } } return sameAsUris; } private List<String> findWorldcatIds() { List<String> worldcatIds = findPropertyStrings(PROP_SAME_AS); for (Iterator<String> it = worldcatIds.iterator(); it.hasNext();) { String worldcatId = it.next(); if (!worldcatId.startsWith(NAMESPACE_WORLDCAT)) { it.remove(); } } return worldcatIds; } private List<LinkToken> findIdentifiers() { List<LinkToken> list = new ArrayList<>(); for (String identifierUri : findPropertyStrings(PROP_IDENTIFIED_BY)) { LinkToken idContents = findIdentifierContents(identifierUri); if (idContents != null) { list.add(idContents); } } return list; } private LinkToken findIdentifierContents(String identifierUri) { String value = null; String type = null; List<Map<String, Object>> results = new QueryRunner(ts, QUERY_IDENTIFIER_CONTENTS) .bindUri("id", identifierUri).execute(); for (Map<String, Object> row : results) { value = (String) row.get("value"); type = (String) row.get("type"); if (!type.equals(TYPE_IDENTIFIER)) { return new LinkToken(value, type); } } if (value == null) { return null; } else { return new LinkToken(value, type); } } private List<String> findPublisherProvisions() { List<String> list = new ArrayList<>(); for (String provisionUri : findPropertyStrings(PROP_HAS_PROVISION)) { list.addAll(findPublisherContents(provisionUri)); } return list; } private List<String> findPublisherContents(String provisionUri) { List<String> publishers = new ArrayList<>(); List<Map<String, Object>> contents = new QueryRunner(ts, QUERY_PUBLISHER_PROVISION) .bindUri("provision", provisionUri).execute(); for (Map<String, Object> row : contents) { StringBuilder publisher = new StringBuilder(); for (Object o : new Object[] { row.get("agent_name"), row.get("location_name"), row.get("date") }) { if (o != null) { publisher.append(", ").append(o); } } if (publisher.length() > 2) { publishers.add(publisher.substring(2)); } } return publishers; } private List<String> findHoldings() { List<String> list = new ArrayList<>(); List<Map<String, Object>> results = new QueryRunner(ts, QUERY_SHELF_MARK).bindUri("instance", uri) .execute(); for (Map<String, Object> row : results) { String value = (String) row.get("value"); if (value != null) { list.add(value); } } return list; } private void assembleSolrDocument() { log.debug("Creating Solr document: " + this); SolrDocument doc = new SolrDocument(); doc.addFieldValue("id", UriUtils.uriToId(uri)); doc.addFieldValues("class_facet", values.get("classes")); doc.addFieldValues("class_display", values.get("classes")); doc.addFieldValue("source_site_facet", sourceSite); doc.addFieldValue("source_site_display", sourceSite); doc.addFieldValues("worldcat_id_token", values.get("worldcat_ids")); doc.addFieldValues("same_as_token", values.get("same_as")); doc.addFieldValues("publisher_t", values.get("publishers")); doc.addFieldValues("holding_t", values.get("holdings")); doc.addFieldValues("extent_t", values.get("extents")); doc.addFieldValues("dimensions_t", values.get("dimensions")); doc.addFieldValues("illustration_note_t", values.get("illustration_notes")); doc.addFieldValues("supplementary_content_note_t", values.get("supplementary_content_notes")); addTitleFields(doc); doc.addFieldValues("instance_of_token", LinkToken.tokens(values.get("instance_of"))); doc.addFieldValues("identifier_token", LinkToken.tokens(values.get("identifiers"))); doc.addFieldValues("text", values.get("titles")); this.solrDocument = doc; } private void addTitleFields(SolrDocument doc) { List<? extends Object> titles = values.get("titles"); if (titles == null || titles.isEmpty()) { return; } doc.addFieldValue("title_display", titles.get(0)); if (titles.size() > 1) { doc.addFieldValues("alt_titles_t", titles.subList(1, titles.size())); } } }