de.fuberlin.wiwiss.marbles.dataproviders.SindiceProvider.java Source code

Java tutorial

Introduction

Here is the source code for de.fuberlin.wiwiss.marbles.dataproviders.SindiceProvider.java

Source

/*
 *   Copyright (c) 2009, MediaEvent Services GmbH & Co. KG
 *   http://mediaeventservices.com
 *   
 *   This file is part of Marbles.
 *
 *   Marbles is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   Marbles is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with Marbles.  If not, see <http://www.gnu.org/licenses/>.
 *   
 */
package de.fuberlin.wiwiss.marbles.dataproviders;

import info.aduna.iteration.Iterations;

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.URIException;
import org.openrdf.model.BNode;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.model.impl.URIImpl;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.repository.RepositoryResult;

import de.fuberlin.wiwiss.marbles.Constants;
import de.fuberlin.wiwiss.marbles.loading.CacheController;

/**
 * Queries Sindice.com for URLs that contain data about a given resource
 * 
 * @author Christian Becker
 */

public class SindiceProvider implements DataProvider {

    /**
     * The URL of Sindice's lookup service 
     */
    private final String SERVICE_URL = "http://api.sindice.com/v2/search?qt=term&page=1&q=";

    /**
     * Generates the lookup URL for a given resource
     * @param resource   The resource for which information is requested
     * @return   Sindice lookup URL
     */
    private String getSindiceLookupURL(Resource resource) {
        try {
            if (!(resource instanceof BNode))
                return SERVICE_URL + URLEncoder.encode(resource.toString(), "UTF-8");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }

        return null;
    }

    public org.apache.commons.httpclient.URI getQueryURL(Resource resource) {
        org.apache.commons.httpclient.URI queryURL = null;
        try {
            String sindiceLookupURL = getSindiceLookupURL(resource);
            if (sindiceLookupURL != null)
                queryURL = new org.apache.commons.httpclient.URI(sindiceLookupURL, true /* is now escaped!! */);
        } catch (URIException e) {
            e.printStackTrace();
        } catch (NullPointerException e) {
            e.printStackTrace();
        }
        return queryURL;
    }

    public List<URI> getURLsFromData(CacheController cacheController, RepositoryConnection conn,
            Resource resource) {
        String lookupURL;
        Resource lookupURLContext;
        List<Statement> statementsList = null;
        List<URI> urlsToBeFetched = new ArrayList<URI>();

        if (null == (lookupURL = getSindiceLookupURL(resource)))
            return null;

        String redirectURL = cacheController.getCachedRedirect(lookupURL);

        try {
            lookupURLContext = new URIImpl(redirectURL != null ? redirectURL : lookupURL);
        } catch (IllegalArgumentException e) {
            e.printStackTrace();
            return null;
        }

        RepositoryResult<Statement> statements;
        try {
            statements = conn.getStatements(null, new URIImpl(Constants.nsSindiceVocab + "link"), null, true,
                    lookupURLContext);
            statementsList = Iterations.addAll(statements, new ArrayList<Statement>());
            statements.close();
        } catch (RepositoryException e1) {
            e1.printStackTrace();
            return null;
        }

        for (Statement st : statementsList) {
            Value obj = st.getObject();
            if (obj instanceof org.openrdf.model.URI && !urlsToBeFetched.contains(obj.toString()))
                try {
                    urlsToBeFetched.add(new URI(obj.toString(), true));
                } catch (URIException e) {
                    e.printStackTrace();
                } catch (NullPointerException e) {
                    e.printStackTrace();
                }
        }

        return urlsToBeFetched;
    }
}