sandbox.perspecting.JSONBasedDBPediaSerializer.java Source code

Java tutorial

Introduction

Here is the source code for sandbox.perspecting.JSONBasedDBPediaSerializer.java

Source

/*
Copyright (c) 2011, 
Benjamin Adrian <benjamin.horak@gmail.com>
German Research Center for Artificial Intelligence (DFKI) <info@dfki.de>
    
All rights reserved.
    
This file is part of SCOOBIE.
    
SCOOBIE is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
    
SCOOBIE is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
    
You should have received a copy of the GNU General Public License
along with SCOOBIE.  If not, see <http://www.gnu.org/licenses/>.
 */

package sandbox.perspecting;

import java.io.StringReader;
import java.net.URL;
import java.util.List;

import org.json.simple.JSONObject;
import org.openrdf.model.Literal;
import org.openrdf.model.Statement;
import org.openrdf.model.impl.URIImpl;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.repository.sail.SailRepositoryConnection;
import org.openrdf.rio.RDFFormat;
import org.openrdf.sail.memory.MemoryStore;

import de.dfki.km.perspecting.obie.connection.KnowledgeBase;
import de.dfki.km.perspecting.obie.model.Document;
import de.dfki.km.perspecting.obie.model.SemanticEntity;
import de.dfki.km.perspecting.obie.model.TokenSequence;
import de.dfki.km.perspecting.obie.postprocessor.Serializer;

/*
 * plainUI.data.relations = {
 "Rainer Brderle": {
 "Born": "22 June 1945",
 "Born in": "Berlin",
 "Nationality": "Germany",
 "Political party": "FDP",
 "Alma mater": " University of Mainz",
 "Profession": "Economist",
 "Website": "www.rainer-bruederle.de",
 "Political role": "Federal Minister for Economics and Technology",
 "_img", "http://server.com/image.jpg",
 },
 "Federal Ministry of Economics and Technology (Germany)": {
 "Formed": "1917 (as the Reichswirtschaftsamt)",
 "Jurisdiction": "Government of Germany",
 "Headquarters": "Berlin",
 "Minister responsible": "Rainer Brderle",
 "Website": "www.bmwi.de"
 },
 "Internet of Services": { "Website": "www.internet-of-services.com" }
 }
    
 */

public class JSONBasedDBPediaSerializer implements Serializer {

    private String dbpediaMirror = "pc-4323:8890";

    public void setDbpediaMirror(String dbpediaMirror) {
        this.dbpediaMirror = dbpediaMirror;
    }

    public String getDbpediaMirror() {
        return dbpediaMirror;
    }

    @Override
    public StringReader serialize(Document document, KnowledgeBase kb) throws Exception {

        List<TokenSequence<SemanticEntity>> results = document.getResolvedSubjects();

        JSONObject json = new JSONObject();

        for (TokenSequence<SemanticEntity> result : results) {
            String uri = result.getValue().getSubjectURI();
            String text = document.getPlainTextContent().substring(result.getStart(), result.getEnd());

            if (!text.matches("[0-9]+")) {
                JSONObject entry = getLinkedDataInfo(uri);
                // if (entry.containsKey("_abstract")) {
                json.put(text, entry);
                // }
            }
        }

        return new StringReader(json.toJSONString());
    }

    @SuppressWarnings("unchecked")
    private JSONObject getLinkedDataInfo(String uri) throws Exception {

        SailRepository sr = new SailRepository(new MemoryStore());
        SailRepositoryConnection conn;
        sr.initialize();
        conn = sr.getConnection();

        JSONObject json = new JSONObject();

        System.out.println("Lookup: " + uri);

        // hack to get data from local DBpedia mirror

        String urlString = uri.replaceFirst("http://dbpedia.org/resource/", "http://" + dbpediaMirror + "/data/");

        URL url = new URL(urlString);

        // only absolute URIs are assumed
        conn.add(url, "", RDFFormat.RDFXML);

        for (Statement stmt : conn.getStatements(null,
                new URIImpl("http://" + dbpediaMirror + "/ontology/wikiPageRedirects"), null, false).asList()) {
            sr = new SailRepository(new MemoryStore());
            sr.initialize();
            conn = sr.getConnection();

            uri = stmt.getObject().stringValue().replace("resource", "data");

            System.out.println("Found redirect from " + url + " to " + stmt.getObject().stringValue());
            conn.add(new URL(uri), "", RDFFormat.RDFXML);
        }

        System.out.println("found " + conn.size() + " triples.");
        json.put("_uri", uri);
        json.put("group", uri.hashCode());

        for (Statement stmt : conn
                .getStatements(new URIImpl(uri.replaceFirst("data", "resource")), null, null, false).asList()) {

            String predicate = stmt.getPredicate().toString()
                    .substring(stmt.getPredicate().toString().lastIndexOf("/") + 1);

            predicate = predicate.substring(predicate.indexOf("#") + 1);

            String predicate2 = "";

            for (char c : predicate.toCharArray()) {
                if (Character.isUpperCase(c)) {
                    predicate2 += " ";
                }
                predicate2 += Character.toLowerCase(c);
            }

            predicate = predicate2;

            if (stmt.getObject() instanceof Literal) {
                String object = stmt.getObject().stringValue();
                if (!object.contains("px") && !object.contains("{")) {

                    if (predicate.equals("abstract") || predicate.equals("comment")) {
                        int i = stmt.getObject().stringValue().indexOf(". ");
                        if (i > 0) {
                            json.put("_abstract", stmt.getObject().stringValue().substring(0, i));
                        } else {
                            json.put("_abstract", stmt.getObject().stringValue());
                        }
                    } else if (predicate.contains(" ")) {
                        json.put(predicate, stmt.getObject().stringValue());
                    }
                }
            } else if (predicate.equals("depiction")) {
                json.put("_img", stmt.getObject().stringValue());
            } else if (predicate.equals("page")) {
                json.put("_wikipedia", stmt.getObject().stringValue());
            } else if (!predicate.endsWith("same as") && !predicate.endsWith("thumbnail")) {

                String object = stmt.getObject().stringValue()
                        .substring(stmt.getObject().stringValue().lastIndexOf("/") + 1);
                if (!object.contains(":") && !predicate.startsWith("wiki")) {
                    json.put(predicate, object.replaceAll("_", " "));
                }
            }

        }

        return json;
    }

}