edu.cornell.mannlib.ld4lindexing.Ld4lIndexer.java Source code

Java tutorial

Introduction

Here is the source code for edu.cornell.mannlib.ld4lindexing.Ld4lIndexer.java

Source

/* $This file is distributed under the terms of the license in /doc/license.txt$ */

package edu.cornell.mannlib.ld4lindexing;

import static edu.cornell.mannlib.ld4lindexing.documents.DocumentFactory.Type.AGENT;
import static edu.cornell.mannlib.ld4lindexing.documents.DocumentFactory.Type.INSTANCE;
import static edu.cornell.mannlib.ld4lindexing.documents.DocumentFactory.Type.WORK;

import java.io.FileInputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.Properties;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.log4j.LogManager;
import org.apache.log4j.PropertyConfigurator;

import edu.cornell.mannlib.ld4lindexing.documents.AgentDocument;
import edu.cornell.mannlib.ld4lindexing.documents.DocumentFactory;
import edu.cornell.mannlib.ld4lindexing.documents.InstanceDocument;
import edu.cornell.mannlib.ld4lindexing.documents.LanguageReference;
import edu.cornell.mannlib.ld4lindexing.documents.WorkDocument;
import edu.cornell.mannlib.ld4lindexing.solrservers.SolrServer;
import edu.cornell.mannlib.ld4lindexing.solrservers.SolrServerFactory;
import edu.cornell.mannlib.ld4lindexing.triplestores.TripleStore;
import edu.cornell.mannlib.ld4lindexing.triplestores.TripleStoreFactory;

/**
 * Use the contents of the triple-store to build the solr index
 */
public class Ld4lIndexer {
    private static final Log log = LogFactory.getLog(Ld4lIndexer.class);

    private static final String PATH_TO_LOG4J_PROPERTIES = "src/edu/cornell/mannlib/ld4lindexing/log4j.properties";

    private static final String QUERY_FIND_WORKS = "" + //
            "PREFIX ld4l: <http://bib.ld4l.org/ontology/> \n" + //
            "SELECT ?uri \n" + //
            "WHERE { \n" + //
            "  ?uri a ld4l:Work . \n" + //
            "} \n" //
    ;

    private final Settings settings;
    private final SolrServer ss;
    private final UriDiscoverer uris;
    private final DocumentFactory docFactory;
    private final Report report;
    private final Bookmark bookmark;

    private final ThreadPool threadPool;

    public Ld4lIndexer(Settings settings, SolrServer ss, UriDiscoverer uris, DocumentFactory docFactory,
            Report report, Bookmark bookmark) {
        this.settings = settings;
        this.ss = ss;
        this.report = report;
        this.bookmark = bookmark;

        this.uris = uris;
        this.docFactory = docFactory;
        this.threadPool = new ThreadPool(settings);
    }

    /**
     * 
     */
    private void run() {
        if (settings.isSampling()) {
            runSampler();
        } else {
            runBuilder();
        }
    }

    private void runSampler() {
        uris.addQuerySpec(WORK, QUERY_FIND_WORKS);
        uris.setUriLimit(settings.getUriLimit());
        try {
            for (DocumentKey key : uris.discover()) {
                WorkDocument workDoc = docFactory.document(key).asWorkDocument();
                if (workDoc == null) {
                    report.recordNullDocument(key);
                } else {
                    processWorkDocument(workDoc);
                }
            }
            threadPool.shutdownAndWait();
            report.completed();
            bookmark.clear();
        } catch (TerminatedException e) {
            threadPool.shutdownAndWait();
            report.interrupted();
        } catch (Exception e) {
            report.failed(e);
        }
    }

    private void processWorkDocument(final WorkDocument workDoc) {
        threadPool.submit(new Runnable() {
            @Override
            public void run() {
                try {
                    workDoc.populate();
                    workDoc.addToSolr(ss);
                } catch (Exception e) {
                    report.recordDocumentException(workDoc, e);
                }
                handleInstances(workDoc);
                handleAgents(workDoc);
            }
        });
    }

    private void handleInstances(final WorkDocument workDoc) {
        for (String uri : workDoc.getInstanceUris()) {
            DocumentKey key = new DocumentKey(INSTANCE, uri);
            InstanceDocument instanceDoc = docFactory.document(key).asInstanceDocument();
            if (instanceDoc == null) {
                report.recordNullDocument(key);
            } else {
                processInstanceDocument(instanceDoc);
            }
        }
    }

    private void processInstanceDocument(final InstanceDocument instanceDoc) {
        threadPool.submit(new Runnable() {
            @Override
            public void run() {
                try {
                    instanceDoc.populate();
                    instanceDoc.addToSolr(ss);
                } catch (Exception e) {
                    report.recordDocumentException(instanceDoc, e);
                }
            }
        });
    }

    private void handleAgents(final WorkDocument workDoc) {
        for (String uri : workDoc.getAgentUris()) {
            DocumentKey key = new DocumentKey(AGENT, uri);
            AgentDocument agentDoc = docFactory.document(key).asAgentDocument();
            if (agentDoc == null) {
                report.recordNullDocument(key);
            } else {
                processAgentDocument(agentDoc);
            }
        }
    }

    private void processAgentDocument(final AgentDocument agentDoc) {
        threadPool.submit(new Runnable() {
            @Override
            public void run() {
                try {
                    agentDoc.populate();
                    agentDoc.addToSolr(ss);
                } catch (Exception e) {
                    report.recordDocumentException(agentDoc, e);
                }
            }
        });
    }

    /**
     * TODO
     */
    private void runBuilder() {
        // TODO Auto-generated method stub
        throw new RuntimeException("Ld4lIndexer.runBuilder() not implemented.");

    }

    public static void main(String[] args) {
        try {
            initializeLogging();
            log.info("Arguments are " + Arrays.toString(args));
            Settings settings = new Settings(args);

            LanguageReference.lookup("bogus");

            TripleStore ts = TripleStoreFactory.instance(settings);
            SolrServer ss = SolrServerFactory.instance(settings);
            Bookmark bookmark = new Bookmark(settings, ss);
            DocumentFactory docFactory = new DocumentFactory(ts);
            Report report = new Report(settings, bookmark, docFactory);
            UriDiscoverer uris = new UriDiscoverer(ts, bookmark, report);
            new SigintHandler(uris);

            new Ld4lIndexer(settings, ss, uris, docFactory, report, bookmark).run();

        } catch (StartupException e) {
            System.out.println();
            System.out.println(e);
            System.out.println();
        }
    }

    private static void initializeLogging() throws StartupException {
        try {
            LogManager.resetConfiguration();
            Properties props = new Properties();
            props.load(new FileInputStream(PATH_TO_LOG4J_PROPERTIES));
            PropertyConfigurator.configure(props);
        } catch (IOException e) {
            throw new LoggingSetupException(e);
        }

    }

    static class LoggingSetupException extends StartupException {
        public LoggingSetupException(Throwable cause) {
            super(cause);
        }
    }

}