spade.storage.Prov.java Source code

Java tutorial

Introduction

Here is the source code for spade.storage.Prov.java

Source

/*
 --------------------------------------------------------------------------------
 SPADE - Support for Provenance Auditing in Distributed Environments.
 Copyright (C) 2015 SRI International
 This program is free software: you can redistribute it and/or
 modify it under the terms of the GNU General Public License as
 published by the Free Software Foundation, either version 3 of the
 License, or (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>.
 --------------------------------------------------------------------------------
 */
package spade.storage;

import org.apache.commons.codec.digest.DigestUtils;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.apache.jena.util.FileManager;
import spade.core.AbstractEdge;
import spade.core.AbstractStorage;
import spade.core.AbstractVertex;
import spade.core.Graph;
import spade.edge.prov.Used;
import spade.edge.prov.WasAssociatedWith;
import spade.edge.prov.WasDerivedFrom;
import spade.edge.prov.WasGeneratedBy;
import spade.edge.prov.WasInformedBy;
import spade.vertex.prov.Activity;
import spade.vertex.prov.Agent;
import spade.vertex.prov.Entity;

import java.io.FileWriter;
import java.sql.ResultSet;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Prov extends AbstractStorage {

    public Logger logger = Logger.getLogger(this.getClass().getName());

    public static enum ProvFormat {
        PROVO, PROVN
    }

    private ProvFormat provOutputFormat;

    private FileWriter outputFile;
    private final int TRANSACTION_LIMIT = 1000;
    private int transaction_count;
    private String filePath;

    private final String provNamespacePrefix = "prov";
    private final String provNamespaceURI = "http://www.w3.org/ns/prov#";

    private final String defaultNamespacePrefix = "data";
    private final String defaultNamespaceURI = "http://spade.csl.sri.com/#";

    private Map<String, Set<String>> annotationToNamespaceMap = new HashMap<String, Set<String>>();
    private Map<String, String> namespacePrefixToURIMap = new HashMap<String, String>();

    private final String OUTFILE_KEY = "output";

    private final String TAB = "\t", NEWLINE = "\n";

    protected Pattern pattern_key_value = Pattern.compile("(\\w+)=\"*((?<=\")[^\"]+(?=\")|([^\\s]+))\"*");

    protected Map<String, String> parseKeyValPairs(String arguments) {
        Matcher key_value_matcher = pattern_key_value.matcher(arguments);
        Map<String, String> keyValPairs = new HashMap<String, String>();
        while (key_value_matcher.find()) {
            keyValPairs.put(key_value_matcher.group(1).trim(), key_value_matcher.group(2).trim());
        }
        return keyValPairs;
    }

    private final Map<String, String> provoStringFormatsForEdgeTypes = new HashMap<String, String>() {
        {
            put("spade.edge.prov.Used", "%s:%s %s:qualifiedUsage [\n\ta %s:Usage;\n\t%s:entity %s:%s;\n%s]; .\n\n");
            put("spade.edge.prov.WasAssociatedWith",
                    "%s:%s %s:qualifiedAssociation [\n\ta %s:Association;\n\t%s:agent %s:%s;\n%s]; .\n\n");
            put("spade.edge.prov.WasDerivedFrom",
                    "%s:%s %s:qualifiedDerivation [\n\ta %s:Derivation;\n\t%s:entity %s:%s;\n%s]; .\n\n");
            put("spade.edge.prov.WasGeneratedBy",
                    "%s:%s %s:qualifiedGeneration [\n\ta %s:Generation;\n\t%s:activity %s:%s;\n%s]; .\n\n");
            put("spade.edge.prov.WasInformedBy",
                    "%s:%s %s:qualifiedCommunication [\n\ta %s:Communication;\n\t%s:activity %s:%s;\n%s]; .\n\n");
        }
    };

    private final Map<String, String> provnStringFormatsForEdgeTypes = new HashMap<String, String>() {
        {
            put("spade.edge.prov.Used", "\tused(%s:%s,%s:%s, - ,%s)\n");
            put("spade.edge.prov.WasAssociatedWith", "\twasAssociatedWith(%s:%s,%s:%s, - ,%s)\n");
            put("spade.edge.prov.WasDerivedFrom", "\twasDerivedFrom(%s:%s,%s:%s,%s)\n");
            put("spade.edge.prov.WasGeneratedBy", "\twasGeneratedBy(%s:%s,%s:%s, - ,%s)\n");
            put("spade.edge.prov.WasInformedBy", "\twasInformedBy(%s:%s,%s:%s,%s)\n");
        }
    };

    private final String provoStringFormatForVertex = "%s:%s\n\ta %s:%s;\n%s .\n\n";
    private final String provnStringFormatForVertex = "\t%s(%s:%s,%s)\n";

    public SimpleDateFormat iso8601TimeFormat;

    @Override
    public boolean initialize(String arguments) {
        Map<String, String> args = parseKeyValPairs(arguments);

        Map<String, String> nsPrefixToFileMap = new HashMap<String, String>();
        nsPrefixToFileMap.putAll(args);
        nsPrefixToFileMap.remove(OUTFILE_KEY); //removing the key which contains the path of the output file as the key from this map which contains rdfs files to read from
        if (!nsPrefixToFileMap.containsKey(provNamespacePrefix)
                && !nsPrefixToFileMap.containsKey(defaultNamespacePrefix)) { //i.e. this prefix is reserved
            if (loadAnnotationsFromRDFs(nsPrefixToFileMap)) {
                filePath = args.get(OUTFILE_KEY);
                provOutputFormat = getProvFormatByFileExt(filePath);
                if (provOutputFormat == null) {
                    if (args.get(OUTFILE_KEY) == null) {
                        logger.log(Level.SEVERE, "No output file specified.");
                    } else {
                        logger.log(Level.SEVERE, "Invalid file extension. Can only be 'provn' or 'ttl'.");
                    }
                    return false;
                } else {
                    try {
                        outputFile = new FileWriter(filePath, false);
                        transaction_count = 0;
                        switch (provOutputFormat) {
                        case PROVN:
                            outputFile.write("document\n");
                            for (String nsPrefix : namespacePrefixToURIMap.keySet()) {
                                outputFile.write(TAB + "prefix " + nsPrefix + " <"
                                        + namespacePrefixToURIMap.get(nsPrefix) + ">\n");
                            }
                            outputFile.write(
                                    TAB + "prefix " + defaultNamespacePrefix + " <" + defaultNamespaceURI + ">\n");
                            outputFile.write(NEWLINE);
                            break;
                        case PROVO:
                            for (String nsPrefix : namespacePrefixToURIMap.keySet()) {
                                outputFile.write("@prefix " + nsPrefix + ": <"
                                        + namespacePrefixToURIMap.get(nsPrefix) + "> .\n");
                            }
                            outputFile.write(
                                    "@prefix " + defaultNamespacePrefix + ": <" + defaultNamespaceURI + "> .\n");
                            outputFile.write("@prefix " + provNamespacePrefix + ": <" + provNamespaceURI + "> .\n");
                            outputFile.write(NEWLINE);
                            break;
                        default:
                            break;
                        }
                        iso8601TimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
                        iso8601TimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
                        return true;
                    } catch (Exception exception) {
                        logger.log(Level.SEVERE, "Error while writing to file", exception);
                        return false;
                    }
                }
            } else {
                return false;
            }
        } else {
            logger.log(Level.SEVERE, "The namespace prefixes '" + provNamespacePrefix + "' and '"
                    + defaultNamespacePrefix + "' are reserved");
            return false;
        }
    }

    private void checkTransactions() {
        transaction_count++;
        if (transaction_count == TRANSACTION_LIMIT) {
            try {
                outputFile.flush();
                outputFile.close();
                outputFile = new FileWriter(filePath, true);
                transaction_count = 0;
            } catch (Exception exception) {
                logger.log(Level.SEVERE, null, exception);
            }
        }
    }

    @Override
    public boolean shutdown() {
        try {
            switch (provOutputFormat) {
            case PROVO:
                //nothing
                break;
            case PROVN:
                outputFile.write("\nendDocument\n");
                break;
            default:
                break;
            }
            outputFile.close();
            return true;
        } catch (Exception exception) {
            logger.log(Level.SEVERE, null, exception);
            return false;
        }
    }

    @Override
    public boolean putVertex(AbstractVertex incomingVertex) {
        try {
            String serializedVertex = getSerializedVertex(incomingVertex);
            outputFile.write(serializedVertex);
            checkTransactions();
            //vertexCount++; finalcommitfilter is doing this increment already
            return true;
        } catch (Exception e) {
            logger.log(Level.WARNING, null, e);
            return false;
        }
    }

    @Override
    public ResultSet executeQuery(String query) {
        return null;
    }

    @Override
    public boolean putEdge(AbstractEdge incomingEdge) {
        try {
            String serializedEdge = getSerializedEdge(incomingEdge);
            outputFile.write(serializedEdge);
            checkTransactions();
            //edgeCount++; finalcommitfilter is doing this increment already
            return true;
        } catch (Exception e) {
            logger.log(Level.WARNING, null, e);
            return false;
        }
    }

    /**
     * This function queries the underlying storage and retrieves the edge
     * matching the given criteria.
     *
     * @param childVertexHash      hash of the source vertex.
     * @param parentVertexHash hash of the destination vertex.
     * @return returns edge object matching the given vertices OR NULL.
     */
    @Override
    public AbstractEdge getEdge(String childVertexHash, String parentVertexHash) {
        return null;
    }

    /**
     * This function queries the underlying storage and retrieves the vertex
     * matching the given criteria.
     *
     * @param vertexHash hash of the vertex to find.
     * @return returns vertex object matching the given hash OR NULL.
     */
    @Override
    public AbstractVertex getVertex(String vertexHash) {
        return null;
    }

    /**
     * This function finds the children of a given vertex.
     * A child is defined as a vertex which is the source of a
     * direct edge between itself and the given vertex.
     *
     * @param parentHash hash of the given vertex
     * @return returns graph object containing children of the given vertex OR NULL.
     */
    @Override
    public Graph getChildren(String parentHash) {
        return null;
    }

    /**
     * This function finds the parents of a given vertex.
     * A parent is defined as a vertex which is the destination of a
     * direct edge between itself and the given vertex.
     *
     * @param childVertexHash hash of the given vertex
     * @return returns graph object containing parents of the given vertex OR NULL.
     */
    @Override
    public Graph getParents(String childVertexHash) {
        return null;
    }

    public ProvFormat getProvFormatByFileExt(String filepath) {
        filepath = String.valueOf(filepath).trim().toLowerCase();
        if (filepath.endsWith(".ttl")) {
            return ProvFormat.PROVO;
        } else if (filepath.endsWith(".provn")) {
            return ProvFormat.PROVN;
        }
        return null;
    }

    public String getSerializedVertex(AbstractVertex vertex) {
        String vertexString = null;
        switch (provOutputFormat) {
        case PROVO:

            vertexString = String.format(provoStringFormatForVertex, defaultNamespacePrefix,
                    DigestUtils.sha256Hex(vertex.toString()), provNamespacePrefix,
                    vertex.getClass().getSimpleName(), getProvOFormattedKeyValPair(vertex.getAnnotations()));

            break;
        case PROVN:

            vertexString = String.format(provnStringFormatForVertex,
                    vertex.getClass().getSimpleName().toLowerCase(), defaultNamespacePrefix,
                    DigestUtils.sha256Hex(vertex.toString()), getProvNFormattedKeyValPair(vertex.getAnnotations()));

            break;
        default:
            break;
        }
        return vertexString;
    }

    public String getSerializedEdge(AbstractEdge edge) {
        String childVertexKey = DigestUtils.sha256Hex(edge.getChildVertex().toString());
        String destVertexKey = DigestUtils.sha256Hex(edge.getParentVertex().toString());
        String edgeString = null;
        switch (provOutputFormat) {
        case PROVO:
            edgeString = String.format(provoStringFormatsForEdgeTypes.get(edge.getClass().getName()),
                    defaultNamespacePrefix, childVertexKey, provNamespacePrefix, provNamespacePrefix,
                    provNamespacePrefix, defaultNamespacePrefix, destVertexKey,
                    getProvOFormattedKeyValPair(edge.getAnnotations()));

            break;
        case PROVN:
            edgeString = String.format(provnStringFormatsForEdgeTypes.get(edge.getClass().getName()),
                    defaultNamespacePrefix, childVertexKey, defaultNamespacePrefix, destVertexKey,
                    getProvNFormattedKeyValPair(edge.getAnnotations()));
            break;
        default:
            break;
        }
        return edgeString;
    }

    private String getProvNFormattedKeyValPair(Map<String, String> keyvals) {
        StringBuffer string = new StringBuffer();
        string.append("[ ");
        for (String key : keyvals.keySet()) {
            if (!key.equals("type")) {
                String value = keyvals.get(key);
                if (key.equals("time")) {
                    value = convertUnixTimeToISO8601(value);
                }
                string.append(getNSPrefixForAnnotation(key)).append(":").append(key).append("=\"").append(value)
                        .append("\",");
            }
        }
        string.deleteCharAt(string.length() - 1);
        string.append("]");
        return string.toString();
    }

    private String getProvOFormattedKeyValPair(Map<String, String> keyvals) {
        StringBuffer annotationsString = new StringBuffer();
        for (Map.Entry<String, String> currentEntry : keyvals.entrySet()) {
            if (!currentEntry.getKey().equals("type")) {
                String value = currentEntry.getValue();
                if (currentEntry.getKey().equals("time")) {
                    value = convertUnixTimeToISO8601(value);
                }
                annotationsString.append(TAB).append(getNSPrefixForAnnotation(currentEntry.getKey())).append(":")
                        .append(currentEntry.getKey()).append(" \"").append(value).append("\";").append(NEWLINE);
            }
        }
        return annotationsString.toString();
    }

    public String convertUnixTimeToISO8601(String timeAsString) {
        try {
            Date timeAsDateObject = new Date((long) (Double.parseDouble(timeAsString) * 1000));
            return iso8601TimeFormat.format(timeAsDateObject);
        } catch (Exception e) {
            logger.log(Level.WARNING, "Failed to parse time", e);
            return "";
        }
    }

    private String getNSPrefixForAnnotation(String annotation) {
        if (annotationToNamespaceMap.get(annotation) != null
                && annotationToNamespaceMap.get(annotation).size() > 0) {
            return annotationToNamespaceMap.get(annotation).iterator().next();
        }
        logger.log(Level.WARNING,
                "The annotation '" + annotation + "' doesn't exist in any of the namespaces provided");
        return defaultNamespacePrefix;
    }

    private boolean loadAnnotationsFromRDFs(Map<String, String> nsPrefixToFileMap) {

        for (String nsprefix : nsPrefixToFileMap.keySet()) {

            String rdfFile = nsPrefixToFileMap.get(nsprefix);

            Model model = null;
            try {
                model = FileManager.get().loadModel(rdfFile);

                StmtIterator stmtIterator = model.listStatements();

                while (stmtIterator.hasNext()) {
                    Statement statement = stmtIterator.nextStatement();

                    if (statement.getPredicate().getLocalName().equals("type")
                            && statement.getPredicate().getNameSpace()
                                    .contains("http://www.w3.org/1999/02/22-rdf-syntax-ns")
                            && (statement.getObject().asResource().getLocalName().equals("Property")
                                    && statement.getObject().asResource().getNameSpace()
                                            .contains("http://www.w3.org/2000/01/rdf-schema")
                                    || statement.getObject().asResource().getLocalName().equals("Property")
                                            && statement.getObject().asResource().getNameSpace()
                                                    .contains("http://www.w3.org/1999/02/22-rdf-syntax-ns"))) {
                        if (!(statement.getSubject().getLocalName() == null
                                || statement.getSubject().getNameSpace() == null)) {
                            Set<String> nsSet = null;
                            if ((nsSet = annotationToNamespaceMap
                                    .get(statement.getSubject().getLocalName())) == null) {
                                nsSet = new HashSet<String>();
                                annotationToNamespaceMap.put(statement.getSubject().getLocalName(), nsSet);
                            }
                            nsSet.add(nsprefix);
                            namespacePrefixToURIMap.put(nsprefix, statement.getSubject().getNameSpace());
                        }
                    }
                }

                model.close();
            } catch (Exception exception) {
                logger.log(Level.SEVERE, "Failed to read file '" + rdfFile + "'", exception);
                return false;
            }

        }

        for (String annotation : annotationToNamespaceMap.keySet()) {
            if (annotationToNamespaceMap.get(annotation).size() > 1) {
                List<String> filepaths = new ArrayList<String>();
                for (String nsPrefix : annotationToNamespaceMap.get(annotation)) {
                    filepaths.add(nsPrefixToFileMap.get(nsPrefix));
                }
                logger.log(Level.WARNING,
                        "Files " + filepaths + " all have the property with name '" + annotation + "'");
            }
        }

        return true;

    }

    public static void main(String[] args) throws Exception {
        Activity a = new Activity();
        a.addAnnotation("name", "a1");
        Activity b = new Activity();
        b.addAnnotation("name", "a2");
        WasInformedBy e = new WasInformedBy(b, a);
        e.addAnnotation("operation", "forked");
        Entity f1 = new Entity();
        f1.addAnnotation("filename", "file_f1");
        Entity f2 = new Entity();
        f2.addAnnotation("filename", "file_f2");
        WasGeneratedBy e2 = new WasGeneratedBy(f1, a);
        e2.addAnnotation("operation", "write");
        WasDerivedFrom e3 = new WasDerivedFrom(f2, f1);
        e3.addAnnotation("operation", "rename");
        Agent agent = new Agent();
        agent.addAnnotation("user", "spade");
        WasAssociatedWith e4 = new WasAssociatedWith(a, agent);
        e4.addAnnotation("test", "anno");
        Used e5 = new Used(b, f2);
        e5.addAnnotation("operation", "read");

        Prov ttl = new Prov();
        ttl.initialize("output=/home/ubwork/prov.ttl audit=/home/ubwork/Desktop/audit.rdfs");
        Prov provn = new Prov();
        provn.initialize("output=/home/ubwork/prov.provn audit=/home/ubwork/Desktop/audit.rdfs");

        Prov provs[] = new Prov[] { ttl, provn };

        for (int cc = 0; cc < provs.length; cc++) {
            Prov prov = provs[cc];
            prov.putVertex(a);
            prov.putVertex(b);
            prov.putVertex(f1);
            prov.putVertex(f2);
            prov.putVertex(agent);
            prov.putEdge(e);
            prov.putEdge(e2);
            prov.putEdge(e3);
            prov.putEdge(e4);
            prov.putEdge(e5);
            prov.shutdown();
        }
    }
}