org.apache.taverna.provenance.api.ProvenanceAccess.java Source code

Introduction

Here is the source code for org.apache.taverna.provenance.api.ProvenanceAccess.java
Source

/*******************************************************************************
 * Copyright (C) 2009 The University of Manchester
 *
 *  Modifications to the initial code base are copyright of their
 *  respective authors, or their employers as appropriate.
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public License
 *  as published by the Free Software Foundation; either version 2.1 of
 *  the License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 *
 ******************************************************************************/
package org.apache.taverna.provenance.api;

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import static java.sql.Connection.TRANSACTION_READ_UNCOMMITTED;
import static javax.naming.Context.INITIAL_CONTEXT_FACTORY;

import java.sql.SQLException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import javax.naming.InitialContext;
import javax.naming.NamingException;

import org.apache.taverna.invocation.InvocationContext;
import org.apache.taverna.invocation.impl.InvocationContextImpl;
import org.apache.taverna.provenance.Provenance;
import org.apache.taverna.provenance.ProvenanceConnectorFactory;
import org.apache.taverna.provenance.connector.AbstractProvenanceConnector;
import org.apache.taverna.provenance.lineageservice.Dependencies;
import org.apache.taverna.provenance.lineageservice.LineageQueryResultRecord;
import org.apache.taverna.provenance.lineageservice.ProvenanceAnalysis;
import org.apache.taverna.provenance.lineageservice.ProvenanceQuery;
import org.apache.taverna.provenance.lineageservice.ProvenanceWriter;
import org.apache.taverna.provenance.lineageservice.utils.Collection;
import org.apache.taverna.provenance.lineageservice.utils.DataLink;
import org.apache.taverna.provenance.lineageservice.utils.DataflowInvocation;
import org.apache.taverna.provenance.lineageservice.utils.PortBinding;
import org.apache.taverna.provenance.lineageservice.utils.ProcessorEnactment;
import org.apache.taverna.provenance.lineageservice.utils.ProvenanceProcessor;
import org.apache.taverna.provenance.lineageservice.utils.Port;
import org.apache.taverna.provenance.lineageservice.utils.Workflow;
import org.apache.taverna.provenance.lineageservice.utils.WorkflowRun;
import org.apache.taverna.provenance.lineageservice.utils.WorkflowTree;
import org.apache.taverna.reference.ReferenceService;
import org.apache.taverna.reference.T2Reference;

import org.apache.commons.dbcp.BasicDataSource;
import org.apache.log4j.Logger;

/**
 * This API is the single access point into the Taverna provenance database. Its
 * main functionality is to let clients query the content of the DB, either
 * using dedicated methods that retrieve specific entity values from the DB, or
 * through a more general XML-based query language. Examples of XML provenance
 * queries can be found in the external package
 * {@link net.sf.taverna.t2.provenance.apic.client.resources}. Class
 * {@link net.sf.taverna.t2.provenance.api.client.ProvenanceAPISampleClient}
 * provides an example of API client that third parties would use to interact
 * with this API.
 * <p/>
 * The XML schema for the XML query language is {@code pquery.xsd} in
 * {@link net.sf.taverna.t2.provenance.apic.client.resources}
 * 
 * @author Paolo Missier
 * @author Stuart Owen
 */
public class ProvenanceAccess implements Provenance {
    private static Logger logger = Logger.getLogger(ProvenanceAccess.class);

    private AbstractProvenanceConnector connector = null;
    private ProvenanceAnalysis analyser = null;
    private ProvenanceQuery querier;
    private ProvenanceWriter writer;

    private String connectorType;
    private final List<ProvenanceConnectorFactory> provenanceConnectorFactories;

    public ProvenanceAccess(String connectorType, List<ProvenanceConnectorFactory> provenanceConnectorFactories) {
        this.connectorType = connectorType;
        this.provenanceConnectorFactories = provenanceConnectorFactories;
        init();
    }

    public ProvenanceAccess(String connectorType, InvocationContext context,
            List<ProvenanceConnectorFactory> provenanceConnectorFactories) {
        this.connectorType = connectorType;
        this.provenanceConnectorFactories = provenanceConnectorFactories;
        init(context);
    }

    /**
     * The recommended data source intitialisation method, where only a driver
     * name and jdbc url are required.<p/>
     * If the driver supports multiple connections, then a pool will be created
     * of 10 min idle, 50 max idle, and 50 max active connections.
     * 
     * @param driverClassName
     * @param jdbcUrl
     */
    public static void initDataSource(String driverClassName, String jdbcUrl) {
        initDataSource(driverClassName, jdbcUrl, null, null, 10, 50, 50);
    }

    /**
     * Initialises a named JNDI DataSource if not already set up externally. The
     * DataSource is named jdbc/taverna
     * 
     * @param driverClassName
     *            - the classname for the driver to be used.
     * @param jdbcUrl
     *            - the jdbc connection url
     * @param username
     *            - the username, if required (otherwise null)
     * @param password
     *            - the password, if required (oteherwise null)
     * @param minIdle
     *            - if the driver supports multiple connections, then the
     *            minumum number of idle connections in the pool
     * @param maxIdle
     *            - if the driver supports multiple connections, then the
     *            maximum number of idle connections in the pool
     * @param maxActive
     *            - if the driver supports multiple connections, then the
     *            minumum number of connections in the pool
     */
    public static void initDataSource(String driverClassName, String jdbcUrl, String username, String password,
            int minIdle, int maxIdle, int maxActive) {
        System.setProperty(INITIAL_CONTEXT_FACTORY, "org.osjava.sj.memory.MemoryContextFactory");
        System.setProperty("org.osjava.sj.jndi.shared", "true");

        BasicDataSource ds = new BasicDataSource();
        ds.setDriverClassName(driverClassName);
        ds.setDefaultTransactionIsolation(TRANSACTION_READ_UNCOMMITTED);
        ds.setMaxActive(maxActive);
        ds.setMinIdle(minIdle);
        ds.setMaxIdle(maxIdle);
        ds.setDefaultAutoCommit(true);
        if (username != null)
            ds.setUsername(username);
        if (password != null)
            ds.setPassword(password);
        ds.setUrl(jdbcUrl);

        try {
            new InitialContext().rebind("jdbc/taverna", ds);
        } catch (NamingException ex) {
            logger.error("Problem rebinding the jdbc context", ex);
        }
    }

    /**
     * Initialises a default Reference Service for storing data and their associated references.
     * This creates a reference service using the named JNDI Data Source 'jdbc/taverna'.<br/>
     * the new Reference Service is associated to the {@link AbstractProvenanceConnector}, enabling data references to be resolved
     */
    @Override
    public InvocationContext initDefaultReferenceService() {
        // FIXME
        return initReferenceService("hibernateReferenceServiceContext.xml");
    }

    /**
     * Initialises the Reference Service for a given hibernate context definition.
     * This mapping file must be available in the root of the classpath.
     * @see #initDefaultReferenceService()
     * @param hibernateContext
     */
    @Override
    public InvocationContext initReferenceService(String hibernateContext) {
        // FIXME
        return new InvocationContextImpl(refService, connector);
    }

    private ReferenceService refService;

    /**
     * Set the Reference Service for the connector of this ProvenanceAccess
     * if you do not 'like' the default one created when ProvenanceAccess is created.
     */
    public void setReferenceService(ReferenceService refService) {
        this.refService = refService;
        if (connector != null)
            connector.setReferenceService(refService);
    }

    @Override
    public void init() {
        init(initDefaultReferenceService());
    }

    @Override
    public void init(InvocationContext context) {
        for (ProvenanceConnectorFactory factory : provenanceConnectorFactories)
            if (connectorType.equalsIgnoreCase(factory.getConnectorType()))
                connector = factory.getProvenanceConnector();
        logger.info("Provenance being captured using: " + connector);

        //slight change, the init is outside but it also means that the init call has to ensure that the dbURL
        //is set correctly
        connector.init();

        connector.setReferenceService(context.getReferenceService()); // CHECK context.getReferenceService());
        connector.setInvocationContext(context);

        analyser = connector.getProvenanceAnalysis();
        analyser.setInvocationContext(context);

        querier = connector.getQuery();
        writer = connector.getWriter();
        writer.setQuery(querier);

        logger.info("using writer of type: " + writer.getClass());
    }

    /*
     * main provenance query methods
     */

    /**
     * Executes a provenance query. Please see separate doc. for the XML query language schema.
     * @throws SQLException
     */
    @Override
    public QueryAnswer executeQuery(Query pq) throws SQLException {
        return analyser.lineageQuery(pq.getTargetPorts(), pq.getRunIDList().get(0), pq.getSelectedProcessors());
    }

    /**
     * Returns individal records from the provenance DB in response to a query
     * that specifies specific elements within values associated with a
     * processor port, in the context of a specific run of a workflow. <br/>
     * This is used in the workbench to retrieve the "intermediate results" at
     * various points during workflow execution, as opposed to a set of
     * dependencies in response to a full-fledged provenance query.
     * 
     * @param workflowRunId
     *            lineage scope -- a specific instance
     * @param processorName
     *            for a specific processor [required]
     * @param a
     *            specific (input or output) variable [optional]
     * @param iteration
     *            and a specific iteration [optional]
     * @return a list of @ LineageQueryResultRecord} , encapsulated in a
     *         {@link Dependencies} object
     * @throws SQLException
     */
    @Override
    public Dependencies fetchPortData(String workflowRunId, String workflowId, String processorName,
            String portName, String iteration) {
        logger.info("running fetchPortData on instance " + workflowRunId + " workflow " + workflowId + " processor "
                + processorName + " port " + portName + " iteration " + iteration);
        // TODO add context workflowID to query
        try {
            return analyser.fetchIntermediateResult(workflowRunId, workflowId, processorName, portName, iteration);
        } catch (SQLException e) {
            logger.error("Problem with fetching intermediate results", e);
            return null;
        }
    }

    /**
     * @param record a record representing a single value -- possibly within a list hierarchy
     * @return the URI for topmost containing collection when the input record is within a list hierarchy, or null otherwise
     */
    @Override
    public String getContainingCollection(LineageQueryResultRecord record) {
        return querier.getContainingCollection(record);
    }

    /*
     * manage instances
     */

    /**
     * @param workflowId
     *            defines the scope of the query - if null then the query runs
     *            on all available workflows
     * @param conditions
     *            additional conditions to be defined. This is a placeholder as
     *            conditions are currently ignored
     * @return a list of workflowRunId, each representing one run of the input
     *         workflowID
     */
    @Override
    public List<WorkflowRun> listRuns(String workflowId, Map<String, String> conditions) {
        try {
            return querier.getRuns(workflowId, conditions);
        } catch (SQLException e) {
            logger.error("Problem with listing runs", e);
            return null;
        }
    }

    @Override
    public boolean isTopLevelDataflow(String workflowId) {
        return querier.isTopLevelDataflow(workflowId);
    }

    @Override
    public boolean isTopLevelDataflow(String workflowId, String workflowRunId) {
        return querier.isTopLevelDataflow(workflowId, workflowRunId);
    }

    @Override
    public String getLatestRunID() throws SQLException {
        return querier.getLatestRunID();
    }

    /**
     * Removes all records that pertain to a specific run (but not the static
     * specification of the workflow run)
     * 
     * @param runID
     *            the internal ID of a run. This can be obtained using
     *            {@link #listRuns(String, Map)}
     * @return the set of data references that pertain to the deleted run. This
     *         can be used by the Data Manager to ensure that no dangling
     *         references are left in the main Taverna data repositorry
     */
    @Override
    public Set<String> removeRun(String runID) {
        // implement using clearDynamic() method or a variation. Collect references and forward
        try {
            Set<String> danglingDataRefs = writer.clearDBDynamic(runID);

            if (logger.isDebugEnabled())
                logger.debug("references collected during removeRun: " + danglingDataRefs);

            // TODO send the list of dangling refs to the Data manager for removal of the corresponding data values
            return danglingDataRefs;
        } catch (SQLException e) {
            logger.error("Problem while removing run : " + runID, e);
            return null;
        }
    }

    /**
     * removes all records pertaining to the static structure of a workflow.
     * 
     * @param workflowId
     *            the ID (not the external name) of the workflow whose static
     *            structure is to be deleted from the DB
     */
    @Override
    public void removeWorkflow(String workflowId) {
        try {
            writer.clearDBStatic(workflowId);
        } catch (SQLException e) {
            logger.error("Problem with removing static workflow: " + workflowId, e);
        }
    }

    /**
     * returns a set of workflowIDs for a given runID. The set is a singleton if
     * the workflow has no nesting, but in general the list contains one
     * workflowID for each nested workflow involved in the run
     * 
     * @param runID
     *            the internal ID for a specific workflow run
     * @return a list of workflow IDs, one for each nested workflow involved in
     *         the input run
     */
    @Override
    public List<String> getWorkflowID(String runID) {
        try {
            return querier.getWorkflowIdsForRun(runID);
        } catch (SQLException e) {
            logger.error("Problem getting workflow ID: " + runID, e);
            return null;
        }
    }

    /**
     * @param runID
     *            the internal ID for a specific workflow run
     * @return the ID of the top-level workflow that executed during the input
     *         run
     */
    @Override
    public String getTopLevelWorkflowID(String runID) {
        try {
            return querier.getTopLevelWorkflowIdForRun(runID);
        } catch (SQLException e) {
            logger.error("Problem getting top level workflow: " + runID, e);
            return null;
        }
    }

    @Override
    public List<Workflow> getWorkflowsForRun(String runID) {
        try {
            return querier.getWorkflowsForRun(runID);
        } catch (SQLException e) {
            logger.error("Problem getting workflows for run:" + runID, e);
            return null;
        }
    }

    /**
     * @return a list of {@link WorkflowRun} beans, each representing the
     *         complete description of a workflow run (note that this is not
     *         just the ID of the run)
     */
    @Override
    public List<WorkflowRun> getAllWorkflowIDs() {
        try {
            return querier.getRuns(null, null);
        } catch (SQLException e) {
            logger.error("Problem getting all workflow IDs", e);
            return null;
        }
    }

    //   / access static workflow structure

    /**
     * @param workflowID
     * @return a Map: workflowID -> [ @ link ProvenanceProcessor} ] Each entry
     *         in the list pertains to one composing sub-workflow (if no nesting
     *         then this contains only one workflow, namely the top level one)
     */
    @Override
    public Map<String, List<ProvenanceProcessor>> getProcessorsInWorkflow(String workflowID) {
        return querier.getProcessorsDeep(null, workflowID);
    }

    @Override
    public List<Collection> getCollectionsForRun(String wfInstanceID) {
        return querier.getCollectionsForRun(wfInstanceID);
    }

    @Override
    public List<PortBinding> getPortBindings(Map<String, String> constraints) throws SQLException {
        return querier.getPortBindings(constraints);
    }

    /**
     * lists all ports for a workflow
     * 
     * @param workflowID
     * @return a list of {@link Port} beans, each representing an input or
     *         output port for the workflow
     */
    @Override
    public List<Port> getPortsForDataflow(String workflowID) {
        return querier.getPortsForDataflow(workflowID);
    }

    /**
     * lists all ports for a workflow
     * 
     * @param workflowID
     * @return a list of {@link Port} beans, each representing an input or
     *         output port for the workflow or a processor in the workflow
     */
    @Override
    public List<Port> getAllPortsInDataflow(String workflowID) {
        return querier.getAllPortsInDataflow(workflowID);
    }

    /**
     * list all ports for a specific processor within a workflow
     * 
     * @param workflowID
     * @param processorName
     * @return a list of {@link Port} beans, each representing an input or
     *         output port for the input processor
     */
    @Override
    public List<Port> getPortsForProcessor(String workflowID, String processorName) {
        return querier.getPortsForProcessor(workflowID, processorName);
    }

    // PM added 5/2010
    @Override
    public String getWorkflowNameByWorkflowID(String workflowID) {
        return querier.getWorkflow(workflowID).getExternalName();
    }

    @Override
    public WorkflowTree getWorkflowNestingStructure(String workflowID) throws SQLException {
        return querier.getWorkflowNestingStructure(workflowID);
    }

    //   public List<ProvenanceProcessor> getSuccessors(String workflowID, String processorName, String portName) {
    //   return null; // TODO
    //   }

    //   public List<String>   getActivities(String workflowID, String processorName) {
    //   return null; // TODO
    //   }

    //   / configure provenance query functionality

    /**
     * include valus of output ports in the query result? input port values are
     * always included<br>
     * default is FALSE
     */
    @Override
    public void toggleIncludeProcessorOutputs(boolean active) {
        analyser.setReturnOutputs(active);
    }

    @Override
    public boolean isIncludeProcessorOutputs() {
        return analyser.isReturnOutputs();
    }

    /**
     * @return an instance of {@link InvocationContext} that can be used by a
     *         client to deref a Taverna data reference
     */
    @Override
    public InvocationContext getInvocationContext() {
        return getProvenanceConnector().getInvocationContext();
    }

    //   / OPM management

    /**
     * should an OPM graph be generated in response to a query?<br>
     * default is TRUE
     */
    @Override
    public void toggleOPMGeneration(boolean active) {
        analyser.setGenerateOPMGraph(active);
    }

    /**
     * 
     * @return true if OPM is set to be generated in response to a query
     */
    @Override
    public boolean isOPMGenerationActive() {
        return analyser.isGenerateOPMGraph();
    }

    /**
     * should actual artifact values be attached to OPM artifact nodes?<br>
     * default is FALSE<br/>
     * THIS IS CURRENTLY UNSUPPORTED -- DEFAULTS TO FALSE
     * 
     * @param active
     */
    @Override
    public void toggleAttachOPMArtifactValues(boolean active) {
        analyser.setRecordArtifactValues(active);
    }

    /**
     * @return true if the OPM graph artifacts are annotated with actual values
     */
    @Override
    public boolean isAttachOPMArtifactValues() {
        return analyser.isRecordArtifactValues();
    }

    /**
     * @deprecated as workflow 'names' are not globally unique, this method
     *             should not be used!
     * @param workflowName
     * @return
     */
    @Override
    public String getWorkflowIDForExternalName(String workflowName) {
        return querier.getWorkflowIdForExternalName(workflowName);
    }

    @Override
    public List<ProvenanceProcessor> getProcessorsForWorkflowID(String workflowID) {
        return querier.getProcessorsForWorkflow(workflowID);
    }

    /**
     * @return the singleton {@link AbstractProvenanceConnector} used by the API
     *         to operate on the DB. Currently we support MySQL
     *         {@link MySQLProvenanceConnector} and Derby
     *         {@link DerbyProvenanceConnector} connectors. The set of supported
     *         connectors is extensible. The available connectors are discovered
     *         automatically by the API upon startup, and it includes all the
     *         connectors that are mentioned in the &lt;dependencies> section of
     *         pom.xml for Maven module
     *         {@code net.sf.taverna.t2.core.provenanceconnector}
     */
    @Override
    public AbstractProvenanceConnector getProvenanceConnector() {
        return connector;
    }

    /**
     * @param provenanceConnector
     *            a specific provenanceConnector used by the API
     */
    public void setProvenanceConnector(AbstractProvenanceConnector provenanceConnector) {
        this.connector = provenanceConnector;
    }

    /**
     * @return
     */
    @Override
    public ProvenanceAnalysis getAnalysis() {
        return analyser;
    }

    /**
     * @param pa
     *            the pa to set
     */
    public void setPa(ProvenanceAnalysis pa) {
        this.analyser = pa;
    }

    /**
     * @return the pq
     */
    @Override
    public ProvenanceQuery getQuery() {
        return querier;
    }

    /**
     * @param pq
     *            the pq to set
     */
    public void setPq(ProvenanceQuery pq) {
        this.querier = pq;
    }

    @Override
    public List<ProcessorEnactment> getProcessorEnactments(String workflowRunId, String... processorPath) {
        return querier.getProcessorEnactments(workflowRunId, processorPath);
    }

    @Override
    public ProcessorEnactment getProcessorEnactmentByProcessId(String workflowRunId, String processIdentifier,
            String iteration) {
        return querier.getProcessorEnactmentByProcessId(workflowRunId, processIdentifier, iteration);
    }

    @Override
    public ProcessorEnactment getProcessorEnactment(String processorEnactmentId) {
        return querier.getProcessorEnactment(processorEnactmentId);
    }

    @Override
    public ProvenanceProcessor getProvenanceProcessor(String workflowId, String processorNameRef) {
        return querier.getProvenanceProcessorByName(workflowId, processorNameRef);
    }

    @Override
    public ProvenanceProcessor getProvenanceProcessor(String processorId) {
        return querier.getProvenanceProcessorById(processorId);
    }

    @Override
    public Map<Port, T2Reference> getDataBindings(String dataBindingId) {
        Map<Port, T2Reference> references = new HashMap<>();
        for (Entry<Port, String> entry : querier.getDataBindings(dataBindingId).entrySet())
            references.put(entry.getKey(),
                    getProvenanceConnector().getReferenceService().referenceFromString(entry.getValue()));
        return references;
    }

    @Override
    public DataflowInvocation getDataflowInvocation(String workflowRunId) {
        return querier.getDataflowInvocation(workflowRunId);
    }

    @Override
    public DataflowInvocation getDataflowInvocation(ProcessorEnactment processorEnactment) {
        return querier.getDataflowInvocation(processorEnactment);
    }

    @Override
    public List<DataflowInvocation> getDataflowInvocations(String workflowRunId) {
        return querier.getDataflowInvocations(workflowRunId);
    }

    @Override
    public List<DataLink> getDataLinks(String workflowId) {
        try {
            Map<String, String> queryConstraints = new HashMap<>();
            queryConstraints.put("workflowId", workflowId);
            return querier.getDataLinks(queryConstraints);
        } catch (SQLException e) {
            logger.error("Problem getting datalinks for workflow:" + workflowId, e);
            return null;
        }
    }
}