hu.bme.mit.trainbenchmark.benchmark.fourstore.driver.FourStoreDriver.java Source code

Java tutorial

Introduction

Here is the source code for hu.bme.mit.trainbenchmark.benchmark.fourstore.driver.FourStoreDriver.java

Source

/*******************************************************************************
 * Copyright (c) 2010-2014, Gabor Szarnyas, Istvan Rath and Daniel Varro
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 * Gabor Szarnyas - initial API and implementation
 *******************************************************************************/
package hu.bme.mit.trainbenchmark.benchmark.fourstore.driver;

import static hu.bme.mit.trainbenchmark.benchmark.fourstore.driver.RDFUtil.brackets;
import hu.bme.mit.trainbenchmark.benchmark.rdf.RDFDatabaseDriver;
import hu.bme.mit.trainbenchmark.benchmark.sesame.driver.URIComparator;
import hu.bme.mit.trainbenchmark.constants.Query;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.io.FileUtils;
import org.openrdf.model.URI;
import org.openrdf.query.BindingSet;
import org.openrdf.query.resultio.text.tsv.SPARQLResultsTSVParser;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;

public class FourStoreDriver extends RDFDatabaseDriver<URI> {

    protected static final String RDF_PREFIX = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
    protected static final String SPARQL_RDF_PREFIX = "PREFIX rdf: <" + RDF_PREFIX + "> ";
    protected static final String CLUSTERNAME = "trainbenchmark_cluster";

    protected boolean showCommands = false;
    protected boolean showCommandOutput = false;
    protected final Map<String, String> environment = ImmutableMap.of("FOURSTORE_CLUSTER_NAME", CLUSTERNAME);

    public FourStoreDriver() throws IOException {
        final String dbPath = "/var/lib/4store/" + CLUSTERNAME;
        if (new File(dbPath).exists()) {
            FileUtils.deleteDirectory(new File(dbPath));
        }
    }

    @Override
    public URIComparator getElementComparator() {
        return new URIComparator();
    }

    public void init() throws FileNotFoundException, IOException {
        UnixUtils.execResourceScript("4s-start.sh", environment, showCommandOutput);
    }

    @Override
    public void destroy() throws FileNotFoundException, IOException {
        UnixUtils.execResourceScript("4s-stop.sh", environment, showCommandOutput);
    }

    @Override
    public void read(final String modelPathWithoutExtension) throws IOException {
        final String modelPath = modelPathWithoutExtension + getExtension();
        final File modelFile = new File(modelPath);
        if (!modelFile.exists()) {
            throw new FileNotFoundException(modelPath);
        }

        UnixUtils.execResourceScript("4s-import.sh", modelFile.getAbsolutePath(), environment, showCommandOutput);
    }

    public void setShowCommandOutput(final boolean showCommandOutput) {
        this.showCommandOutput = showCommandOutput;
    }

    protected final String ID_PREFIX = "_";

    protected final SPARQLResultsTSVParser parser = new SPARQLResultsTSVParser();

    @Override
    public Collection<BindingSet> runQuery(final Query query, final String queryDefinition) throws IOException {
        return runQuery(queryDefinition);
    }

    protected Collection<BindingSet> runQuery(final String queryDefinition) throws Exception {
        final String command = String.format("4s-query $FOURSTORE_CLUSTER_NAME -f text -s -1 '%s'",
                queryDefinition);
        if (showCommands) {
            System.out.println(command);
        }

        final BindingSetCollector bindingSetCollector = new BindingSetCollector();
        parser.setQueryResultHandler(bindingSetCollector);

        final InputStream is = UnixUtils.execToStream(command, environment);
        parser.parse(is);
        final Collection<BindingSet> bindingSets = bindingSetCollector.getBindingSets();

        return bindingSets;
    }

    public List<Long> queryIds(final String query) throws IOException {
        final Collection<BindingSet> bindingSets = runQuery(query);

        System.out.println(bindingSets);
        // // example: <http://www.semanticweb.org/ontologies/2011/1/TrainRequirementOntology.owl#_87947>
        // final String regex = "<.*#" + ID_PREFIX + "(\\d+)>";
        // final Pattern pattern = Pattern.compile(regex);
        //
        // final List<Long> results = new ArrayList<>();
        //
        // String line;
        // while ((line = reader.readLine()) != null) {
        // final Matcher matcher = pattern.matcher(line);
        // if (matcher.matches()) {
        // final Long id = new Long(matcher.group(1));
        // results.add(id);
        // }
        // }
        return null;
    }

    @Override
    public List<URI> collectVertices(final String type) throws IOException {
        final String queryDefinition = String.format(SPARQL_RDF_PREFIX + "SELECT ?a WHERE { ?a rdf:type %s }",
                RDFUtil.brackets(type));
        final Collection<BindingSet> ids = runQuery(queryDefinition);

        final List<URI> vertices = new LinkedList<>();
        for (final BindingSet bindingSet : ids) {
            // ...
        }
        return vertices;
    }

    @Override
    public boolean ask(final String query) throws IOException {
        final Collection<BindingSet> bindingSet = runQuery(query);
        // final String line = reader.readLine();
        //
        // switch (line) {
        // case "true":
        // return true;
        // case "false":
        // return false;
        // default:
        // throw new IOException("Result for ASK query should be 'true' or 'false'. Received '" + line + "' instead.");
        // }
        System.out.println(bindingSet);
        throw new RuntimeException();
        // return false;
    }

    // note the following limitation of arguments in Bash and set the PARTITION_SIZE accordingly.
    // "one argument must not be longer than MAX_ARG_STRLEN (131072)" (http://www.in-ulm.de/~mascheck/various/argmax/)
    private static final int PARTITION_SIZE = 500;

    // create

    // insert vertices

    public void insertVertex(final String uri, final String type) throws IOException {
        final List<String> uris = new ArrayList<>();
        uris.add(uri);
        insertVertices(uris, type);
    }

    public void insertVertices(final List<String> uris, final String type) throws IOException {
        if (uris.isEmpty()) {
            return;
        }

        final List<List<String>> partitions = Lists.partition(uris, PARTITION_SIZE);
        for (final List<String> partition : partitions) {
            insertVerticesPartition(partition, type);
        }
    }

    private void insertVerticesPartition(final Collection<String> uris, final String type) throws IOException {
        final StringBuilder insertQueryBuilder = new StringBuilder(SPARQL_RDF_PREFIX);
        insertQueryBuilder.append("INSERT DATA {");
        for (final String uri : uris) {
            insertQueryBuilder.append(String.format(". %s rdf:type %s", brackets(uri), brackets(type)));
        }
        insertQueryBuilder.append("}");

        // run the update
        runUpdate(insertQueryBuilder.toString());
    }

    // insert edges

    public void insertEdge(final String sourceVertexURI, final String targetVertexURI, final String type)
            throws IOException {
        final Multimap<String, String> edges = HashMultimap.create();
        edges.put(sourceVertexURI, targetVertexURI);
        insertEdges(edges, type);
    }

    public void insertEdges(final Multimap<String, String> edges, final String type) throws IOException {
        if (edges.isEmpty()) {
            return;
        }

        final ArrayList<String> sourceVertices = new ArrayList<>(edges.keySet());
        final List<List<String>> sourceVerticesPartitions = Lists.partition(sourceVertices, PARTITION_SIZE);
        for (final List<String> sourceVerticesPartition : sourceVerticesPartitions) {

            final Multimap<String, String> edgePartition = ArrayListMultimap.create();
            for (final String sourceVertexURI : sourceVerticesPartition) {
                final Collection<String> targetVertexURIs = edges.get(sourceVertexURI);
                edgePartition.putAll(sourceVertexURI, targetVertexURIs);
            }

            insertEdgesPartition(edgePartition, type);
        }
    }

    private void insertEdgesPartition(final Multimap<String, String> edges, final String type) throws IOException {
        final StringBuilder insertQueryBuilder = new StringBuilder("INSERT DATA {");
        edgesToTriples(edges, type, insertQueryBuilder);
        insertQueryBuilder.append("}");

        // run the update
        runUpdate(insertQueryBuilder.toString());
    }

    // insert edges with verties

    public void insertEdgeWithVertex(final String sourceURI, final String targetURI, final String edgeType,
            final String targetVertexType) throws IOException {
        final Multimap<String, String> edges = HashMultimap.create();
        edges.put(sourceURI, targetURI);
        insertEdgesWithVertex(edges, edgeType, targetVertexType);
    }

    public void insertEdgesWithVertex(final Multimap<String, String> edges, final String edgeType,
            final String targetVertexType) throws IOException {
        if (edges.isEmpty()) {
            return;
        }

        final ArrayList<String> sourceVertices = new ArrayList<>(edges.keySet());
        final List<List<String>> sourceVerticesPartitions = Lists.partition(sourceVertices, PARTITION_SIZE);
        for (final List<String> sourceVerticesPartition : sourceVerticesPartitions) {

            final Multimap<String, String> edgePartition = ArrayListMultimap.create();
            for (final String sourceVertexURI : sourceVerticesPartition) {
                final Collection<String> targetVertexURIs = edges.get(sourceVertexURI);
                edgePartition.putAll(sourceVertexURI, targetVertexURIs);
            }

            insertEdgesWithVertexPartition(edgePartition, edgeType, targetVertexType);
        }

    }

    private void insertEdgesWithVertexPartition(final Multimap<String, String> edges, final String edgeType,
            final String targetVertexType) throws IOException {
        final StringBuilder insertQueryBuilder = new StringBuilder(SPARQL_RDF_PREFIX);
        insertQueryBuilder.append("INSERT DATA {");
        edgesToTriples(edges, edgeType, insertQueryBuilder);
        for (final String targetVertex : edges.values()) {
            insertQueryBuilder
                    .append(String.format(". %s rdf:type %s", brackets(targetVertex), brackets(targetVertexType)));
        }
        insertQueryBuilder.append("}");

        // run the update
        runUpdate(insertQueryBuilder.toString());
    }

    // update properties

    public void updateProperty(final String vertex, final String type, final Object value) throws IOException {
        final Map<String, Object> properties = new HashMap<>();
        properties.put(vertex, value);
        updateProperties(properties, type);
    }

    public void updateProperties(final Map<String, Object> properties, final String type) throws IOException {
        if (properties.isEmpty()) {
            return;
        }

        final List<String> vertexURIs = new ArrayList<>(properties.keySet());

        final List<List<String>> vertexURIpartitions = Lists.partition(vertexURIs, PARTITION_SIZE);
        for (final List<String> vertexURIpartition : vertexURIpartitions) {

            final Map<String, Object> propertyPartition = new HashMap<>();
            for (final String vertexURI : vertexURIpartition) {
                final Object value = properties.get(vertexURI);
                propertyPartition.put(vertexURI, value);
            }

            updatePropertiesPartition(propertyPartition, type);
        }
    }

    private void updatePropertiesPartition(final Map<String, Object> properties, final String type)
            throws IOException {
        final StringBuilder updateQueryBuilder = new StringBuilder(SPARQL_RDF_PREFIX);
        int i = 0;

        // delete
        for (final Entry<String, Object> property : properties.entrySet()) {
            final String vertex = property.getKey();

            i++;
            updateQueryBuilder.append(String.format("DELETE { %s %s ?a%d } WHERE { %s %s ?a%d }; ",
                    brackets(vertex), brackets(type), i, brackets(vertex), brackets(type), i));
        }

        // insert
        boolean first = true;
        updateQueryBuilder.append("INSERT DATA {");
        for (final Entry<String, Object> property : properties.entrySet()) {
            if (first) {
                first = false;
            } else {
                updateQueryBuilder.append(".");
            }
            final String vertex = property.getKey();
            final String value = RDFUtil.toLiteral(property.getValue());
            updateQueryBuilder.append(String.format(" %s %s %s ", brackets(vertex), brackets(type), value));
        }
        updateQueryBuilder.append("}");

        // run the update
        runUpdate(updateQueryBuilder.toString());
    }

    // delete vertices

    public void deleteVertex(final String uri) throws IOException {
        final List<String> vertices = new ArrayList<>();
        vertices.add(uri);
        deleteVertices(vertices);
    }

    public void deleteVertices(final List<String> uris) throws IOException {
        if (uris.isEmpty()) {
            return;
        }

        final List<List<String>> partitions = Lists.partition(uris, PARTITION_SIZE);
        for (final List<String> partition : partitions) {
            deleteVertexPartition(partition);
        }
    }

    private void deleteVertexPartition(final List<String> uris) throws IOException {
        final StringBuilder deleteQueryBuilder = new StringBuilder();

        // add a number to each variable number in the SPARQL query in order to make it unique
        long i = 0;
        for (final String vertex : uris) {
            // if we try to use DELETE DATA (as in the deleteEdge() method), 4store throws an error:
            // DELETE WHERE { x } not yet supported, use DELETE { x } WHERE { x }
            i++;
            // delete "incoming edges"
            deleteQueryBuilder.append(String.format("DELETE { ?a%d ?b%d %s } WHERE { ?a%d ?b%d %s }; ", i, i,
                    brackets(vertex), i, i, brackets(vertex)));
            i++;
            // delete "outgoing edges" and "properties"
            deleteQueryBuilder.append(String.format("DELETE { %s ?a%d ?b%d } WHERE { %s ?a%d ?b%d }; ",
                    brackets(vertex), i, i, brackets(vertex), i, i));
        }

        runUpdate(deleteQueryBuilder.toString());
    }

    // delete edges

    public void deleteEdge(final String sourceVertexURI, final String targetVertexURI, final String type)
            throws IOException {
        final Multimap<String, String> edges = HashMultimap.create();
        edges.put(sourceVertexURI, targetVertexURI);
        deleteEdges(edges, type);
    }

    public void deleteEdges(final Multimap<String, String> edges, final String type) throws IOException {
        if (edges.isEmpty()) {
            return;
        }

        deleteEdgesPartition(edges, type);
    }

    private void deleteEdgesPartition(final Multimap<String, String> edges, final String type) throws IOException {
        final StringBuilder deleteQueryBuilder = new StringBuilder("DELETE DATA {");
        edgesToTriples(edges, type, deleteQueryBuilder);
        deleteQueryBuilder.append("}");
        runUpdate(deleteQueryBuilder.toString());
    }

    // helper methods

    public void runUpdate(final String query) throws IOException {
        final String command = String.format("4s-update $FOURSTORE_CLUSTER_NAME '%s'", query);

        if (showCommands) {
            System.out.println(command);
        }
        UnixUtils.exec(command, environment);
    }

    protected StringBuilder initBuilder() {
        StringBuilder insertQueryBuilder;
        insertQueryBuilder = new StringBuilder(SPARQL_RDF_PREFIX);
        insertQueryBuilder.append("INSERT DATA {");
        return insertQueryBuilder;
    }

    protected void edgesToTriples(final Multimap<String, String> edges, final String edgeLabel,
            final StringBuilder insertQueryBuilder) {
        boolean first = true;
        for (final Entry<String, String> edge : edges.entries()) {
            if (first) {
                first = false;
            } else {
                insertQueryBuilder.append(".");
            }
            final String sourceVertex = edge.getKey();
            final String targetVertex = edge.getValue();

            insertQueryBuilder.append(String.format(" %s %s %s ", brackets(sourceVertex), brackets(edgeLabel),
                    brackets(targetVertex)));
        }
    }

}