edu.cornell.mannlib.ld4lindexing.documents.InstanceDocument.java Source code

Java tutorial

Introduction

Here is the source code for edu.cornell.mannlib.ld4lindexing.documents.InstanceDocument.java

Source

/* $This file is distributed under the terms of the license in /doc/license.txt$ */

package edu.cornell.mannlib.ld4lindexing.documents;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import edu.cornell.mannlib.ld4lindexing.QueryRunner;
import edu.cornell.mannlib.ld4lindexing.solrservers.SolrDocument;
import edu.cornell.mannlib.ld4lindexing.triplestores.TripleStore;

/**
 * Builds a Solr document for an Instance.
 */
public class InstanceDocument extends BaseDocument {
    private static final Log log = LogFactory.getLog(InstanceDocument.class);

    private static final String NAMESPACE_WORLDCAT = "http://www.worldcat.org/oclc/";
    private static final String PROP_SAME_AS = "http://www.w3.org/2002/07/owl#sameAs";

    private static final String PROP_INSTANCE_OF = "http://bib.ld4l.org/ontology/instanceOf";
    private static final String PROP_IDENTIFIED_BY = "http://bib.ld4l.org/ontology/identifiedBy";
    private static final String PROP_HAS_PROVISION = "http://bib.ld4l.org/ontology/hasProvision";
    private static final String PROP_EXTENT = "http://bib.ld4l.org/ontology/extent";
    private static final String PROP_DIMENSIONS = "http://bib.ld4l.org/ontology/dimensions";
    private static final String PROP_ILLUSTRATION_NOTE = "http://bib.ld4l.org/ontology/illustrationNote";
    private static final String PROP_SUPPLEMENTARY_CONTENT_NOTE = "http://bib.ld4l.org/ontology/legacy/supplementaryContentNote";

    private static final String TYPE_IDENTIFIER = "http://bib.ld4l.org/ontology/Identifier";

    private static final String QUERY_IDENTIFIER_CONTENTS = "" //
            + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" //
            + "SELECT ?type ?value \n" //
            + "WHERE { \n" //
            + "  ?id a ?type . \n" //
            + "  ?id rdf:value ?value . \n" //
            + "} LIMIT 1000 \n";

    private static final String QUERY_PUBLISHER_PROVISION = "" //
            + "PREFIX dc: <http://purl.org/dc/elements/1.1/> \n" //
            + "PREFIX ld4l: <http://bib.ld4l.org/ontology/> \n" //
            + "PREFIX prov: <http://www.w3.org/ns/prov#> \n" //
            + "PREFIX foaf: <http://http://xmlns.com/foaf/0.1/> \n" //
            + "SELECT ?agent_name, ?location_name, ?date \n" //
            + "WHERE { \n" //
            + "  ?provision a ld4l:PublisherProvision . \n" //
            + "  OPTIONAL { \n" //
            + "    ?provision dc:date ?date . \n" //
            + "  } \n" //
            + "  OPTIONAL { \n" //
            + "    ?provision prov:agent ?agent . \n" //
            + "    ?agent foaf:name ?agent_name . \n" //
            + "  } \n" //
            + "  OPTIONAL { \n" //
            + "    ?provision prov:atLocation ?location . \n" //
            + "    ?location foaf:name ?location_name . \n" //
            + "  } \n" //
            + "} LIMIT 1000 \n";

    private static final String QUERY_SHELF_MARK = "" //
            + "PREFIX ld4l: <http://bib.ld4l.org/ontology/> \n" //
            + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" //
            + "SELECT  ?value \n" //
            + "WHERE { \n" //
            + "  ?holding ld4l:isHoldingFor ?instance . \n" //
            + "  ?holding ld4l:hasShelfMark ?sm . \n" //
            + "  ?sm rdf:value ?value . \n" //
            + "} LIMIT 1000 \n";

    public InstanceDocument(String uri, DocumentStatsAccumulator stats, TripleStore ts) {
        super(uri, stats, ts);
    }

    @Override
    public void populate() {
        populateProperties();
        populateValues();
        assembleSolrDocument();
        addToStats();
    }

    public void populateValues() {
        values.put("classes", findClasses());
        values.put("titles", findTitles());
        values.put("instance_of", findInstanceOfs());

        values.put("worldcat_ids", findWorldcatIds());
        values.put("same_as_uris", findSameAsUris());

        values.put("identifiers", findIdentifiers());
        values.put("publishers", findPublisherProvisions());
        values.put("holdings", findHoldings());
        values.put("extents", findPropertyStrings(PROP_EXTENT));
        values.put("dimensions", findPropertyStrings(PROP_DIMENSIONS));
        values.put("illustration_notes", findPropertyStrings(PROP_ILLUSTRATION_NOTE));
        values.put("supplementary_content_notes", findPropertyStrings(PROP_SUPPLEMENTARY_CONTENT_NOTE));
    }

    private List<LinkToken> findInstanceOfs() {
        List<LinkToken> list = new ArrayList<>();
        List<String> workUris = findPropertyStrings(PROP_INSTANCE_OF);
        for (String workUri : workUris) {
            list.add(new LinkToken(findTitlesFor(workUri).get(0), workUri));
        }
        return list;
    }

    private List<String> findSameAsUris() {
        List<String> sameAsUris = findPropertyStrings(PROP_SAME_AS);
        for (Iterator<String> it = sameAsUris.iterator(); it.hasNext();) {
            String sameAsUri = it.next();
            if (sameAsUri.startsWith(NAMESPACE_WORLDCAT)) {
                it.remove();
            }
        }
        return sameAsUris;
    }

    private List<String> findWorldcatIds() {
        List<String> worldcatIds = findPropertyStrings(PROP_SAME_AS);
        for (Iterator<String> it = worldcatIds.iterator(); it.hasNext();) {
            String worldcatId = it.next();
            if (!worldcatId.startsWith(NAMESPACE_WORLDCAT)) {
                it.remove();
            }
        }
        return worldcatIds;
    }

    private List<LinkToken> findIdentifiers() {
        List<LinkToken> list = new ArrayList<>();
        for (String identifierUri : findPropertyStrings(PROP_IDENTIFIED_BY)) {
            LinkToken idContents = findIdentifierContents(identifierUri);
            if (idContents != null) {
                list.add(idContents);
            }
        }
        return list;
    }

    private LinkToken findIdentifierContents(String identifierUri) {
        String value = null;
        String type = null;
        List<Map<String, Object>> results = new QueryRunner(ts, QUERY_IDENTIFIER_CONTENTS)
                .bindUri("id", identifierUri).execute();
        for (Map<String, Object> row : results) {
            value = (String) row.get("value");
            type = (String) row.get("type");
            if (!type.equals(TYPE_IDENTIFIER)) {
                return new LinkToken(value, type);
            }
        }
        if (value == null) {
            return null;
        } else {
            return new LinkToken(value, type);
        }
    }

    private List<String> findPublisherProvisions() {
        List<String> list = new ArrayList<>();
        for (String provisionUri : findPropertyStrings(PROP_HAS_PROVISION)) {
            list.addAll(findPublisherContents(provisionUri));
        }
        return list;
    }

    private List<String> findPublisherContents(String provisionUri) {
        List<String> publishers = new ArrayList<>();
        List<Map<String, Object>> contents = new QueryRunner(ts, QUERY_PUBLISHER_PROVISION)
                .bindUri("provision", provisionUri).execute();
        for (Map<String, Object> row : contents) {
            StringBuilder publisher = new StringBuilder();
            for (Object o : new Object[] { row.get("agent_name"), row.get("location_name"), row.get("date") }) {
                if (o != null) {
                    publisher.append(", ").append(o);
                }
            }
            if (publisher.length() > 2) {
                publishers.add(publisher.substring(2));
            }
        }
        return publishers;
    }

    private List<String> findHoldings() {
        List<String> list = new ArrayList<>();
        List<Map<String, Object>> results = new QueryRunner(ts, QUERY_SHELF_MARK).bindUri("instance", uri)
                .execute();
        for (Map<String, Object> row : results) {
            String value = (String) row.get("value");
            if (value != null) {
                list.add(value);
            }
        }
        return list;
    }

    private void assembleSolrDocument() {
        log.debug("Creating Solr document: " + this);
        SolrDocument doc = new SolrDocument();

        doc.addFieldValue("id", UriUtils.uriToId(uri));
        doc.addFieldValues("class_facet", values.get("classes"));
        doc.addFieldValues("class_display", values.get("classes"));
        doc.addFieldValue("source_site_facet", sourceSite);
        doc.addFieldValue("source_site_display", sourceSite);
        doc.addFieldValues("worldcat_id_token", values.get("worldcat_ids"));
        doc.addFieldValues("same_as_token", values.get("same_as"));
        doc.addFieldValues("publisher_t", values.get("publishers"));
        doc.addFieldValues("holding_t", values.get("holdings"));
        doc.addFieldValues("extent_t", values.get("extents"));
        doc.addFieldValues("dimensions_t", values.get("dimensions"));
        doc.addFieldValues("illustration_note_t", values.get("illustration_notes"));
        doc.addFieldValues("supplementary_content_note_t", values.get("supplementary_content_notes"));

        addTitleFields(doc);

        doc.addFieldValues("instance_of_token", LinkToken.tokens(values.get("instance_of")));
        doc.addFieldValues("identifier_token", LinkToken.tokens(values.get("identifiers")));

        doc.addFieldValues("text", values.get("titles"));
        this.solrDocument = doc;
    }

    private void addTitleFields(SolrDocument doc) {
        List<? extends Object> titles = values.get("titles");
        if (titles == null || titles.isEmpty()) {
            return;
        }
        doc.addFieldValue("title_display", titles.get(0));
        if (titles.size() > 1) {
            doc.addFieldValues("alt_titles_t", titles.subList(1, titles.size()));
        }
    }

}