net.sf.taverna.t2.provenance.lineageservice.EventProcessor.java Source code

Introduction

Here is the source code for net.sf.taverna.t2.provenance.lineageservice.EventProcessor.java
Source

/*******************************************************************************
 * Copyright (C) 2007 The University of Manchester   
 * 
 *  Modifications to the initial code base are copyright of their
 *  respective authors, or their employers as appropriate.
 * 
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public License
 *  as published by the Free Software Foundation; either version 2.1 of
 *  the License, or (at your option) any later version.
 *    
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *    
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 ******************************************************************************/
package net.sf.taverna.t2.provenance.lineageservice;

import java.beans.ExceptionListener;
import java.beans.XMLEncoder;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.sql.Blob;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

import javax.sql.rowset.serial.SerialBlob;

import net.sf.taverna.t2.provenance.item.InputDataProvenanceItem;
import net.sf.taverna.t2.provenance.item.IterationProvenanceItem;
import net.sf.taverna.t2.provenance.item.OutputDataProvenanceItem;
import net.sf.taverna.t2.provenance.item.ProvenanceItem;
import net.sf.taverna.t2.provenance.item.WorkflowProvenanceItem;
import net.sf.taverna.t2.provenance.lineageservice.utils.Arc;
import net.sf.taverna.t2.provenance.lineageservice.utils.NestedListNode;
import net.sf.taverna.t2.provenance.lineageservice.utils.ProcBinding;
import net.sf.taverna.t2.provenance.lineageservice.utils.ProvenanceUtils;
import net.sf.taverna.t2.provenance.lineageservice.utils.Var;
import net.sf.taverna.t2.provenance.lineageservice.utils.VarBinding;
import net.sf.taverna.t2.provenance.vocabulary.SharedVocabulary;
import net.sf.taverna.t2.workflowmodel.Dataflow;
import net.sf.taverna.t2.workflowmodel.DataflowInputPort;
import net.sf.taverna.t2.workflowmodel.DataflowOutputPort;
import net.sf.taverna.t2.workflowmodel.Datalink;
import net.sf.taverna.t2.workflowmodel.Processor;
import net.sf.taverna.t2.workflowmodel.ProcessorInputPort;
import net.sf.taverna.t2.workflowmodel.ProcessorOutputPort;
import net.sf.taverna.t2.workflowmodel.processor.activity.Activity;
import net.sf.taverna.t2.workflowmodel.serialization.xml.XMLSerializerRegistry;

import org.apache.commons.io.FileUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;
import org.jdom.output.XMLOutputter;

/**
 * @author Paolo Missier
 */
public class EventProcessor {
    /**
     * A map of UUIDs of the originating processor to the ProcBinding object
     * that contains its parameters
     */
    private Map<String, ProcBinding> procBindingMap = new ConcurrentHashMap<String, ProcBinding>();;

    /** A map of child ids to their parents in the hierarchy of events:
     *  workflow -> process -> processor -> activity -> iteration
     */
    private Map<String, String> parentChildMap = new ConcurrentHashMap<String, String>();

    private static Logger logger = Logger.getLogger(EventProcessor.class);

    private static final String OUTPUT_CONTAINER_PROCESSOR = "_OUTPUT_";
    private static final String INPUT_CONTAINER_PROCESSOR = "_INPUT_";
    private static final String TEST_EVENTS_FOLDER = "/tmp/TEST-EVENTS";

    private static final String DATAFLOW_PROCESSOR_TYPE = "net.sf.taverna.t2.activities.dataflow.DataflowActivity";

    private static final String DUMMY_INSTANCE_ID = "dummyInstanceID";

    private int eventCnt = 0; // for events logging
    private volatile boolean workflowStructureDone = false; // used to inhibit processing of multiple workflow events -- we only need the first
    private int dataflowDepth = 0; // incremented when we recurse on a subflow, decremented on exit
    private volatile String wfInstanceID = null; // unique run ID. set when we see the first event of type "process"

    String topLevelDataflowName = null;
    String topLevelDataflowID = null;

    Map<String, String> wfNestingMap = new ConcurrentHashMap<String, String>();

    // all input bindings are accumulated here so they can be "backpatched" (see backpatching() )
    List<VarBinding> allInputVarBindings = Collections.synchronizedList(new ArrayList<VarBinding>());

    // dedicated class for processing WorkflowData events which carry workflow output info 
    private WorkflowDataProcessor wfdp;
    private ProvenanceWriter pw = null;
    private ProvenanceQuery pq = null;

    public EventProcessor() {
    }

    /**
     * @param pw
     * @throws SQLException
     * @throws ClassNotFoundException
     * @throws IllegalAccessException
     * @throws InstantiationException
     * 
     */
    public EventProcessor(ProvenanceWriter pw, ProvenanceQuery pq, WorkflowDataProcessor wfdp)
            throws InstantiationException, IllegalAccessException, ClassNotFoundException, SQLException {
        this.pw = pw;
        this.pq = pq;
        this.wfdp = wfdp;

        logger.setLevel((Level) Level.INFO);
    }

    /**
     * this is the new version that makes use of the T2 deserializer
     * populate static portion of the DB<br/>
     * the static structure may already be in the DB -- this is detected as a duplicate top-level workflow ID.
     * In this case, we skip this processing altogether 
     * @param content
     *            is a serialized dataflow (XML) -- this is parsed using the T2
     *            Deserializer
     * @return the wfInstanceRef for this workflow structure
     */
    public String processWorkflowStructure(ProvenanceItem provenanceItem) {

        // this flag is set to prevent processing of separate workflowProvenanceItems that describe nested workflows.
        // the processing of all nested workflows is done as part of the very first workflowProvenanceItem that we receive,
        // which is self-consistent. so we ignore all others
        if (workflowStructureDone) {
            return null;
        }

        setWfInstanceID(((WorkflowProvenanceItem) provenanceItem).getIdentifier());
        //      logger.debug("Workflow instance is: " + getWfInstanceID());
        Dataflow df = null;

        df = ((WorkflowProvenanceItem) provenanceItem).getDataflow();

        workflowStructureDone = true;

        return processWorkflowStructure(df);
    }

    public String processWorkflowStructure(Dataflow df) {

        topLevelDataflowName = df.getLocalName();
        topLevelDataflowID = df.getInternalIdentier();

        // check whether we already have this WF in the DB
        List<String> wfNames = null;
        try {
            wfNames = pq.getAllWFnames();
        } catch (SQLException e) {
            logger.warn("Problem processing workflow structure", e);
        }

        if (wfNames != null && wfNames.contains(topLevelDataflowID)) { // already in the DB
            //         logger.info("workflow structure with ID "+topLevelDataflowID+" is in the DB -- clearing static portion");

            // clearing the portion of the static DB that pertains to this specific WF.
            // it is going to be rewritten right away in the rest of this method
            // this is simpler to implement than selectively avoiding duplicate writes to the DB
            try {
                pw.clearDBStatic(topLevelDataflowID);
            } catch (SQLException e) {
                logger.warn("Can't clear static database for " + topLevelDataflowID, e);
            }

        } else {
            //         logger.info("new workflow structure with ID "+topLevelDataflowID);
        }

        // record the top level dataflow as a processor in the DB
        try {
            pw.addProcessor(topLevelDataflowName, DATAFLOW_PROCESSOR_TYPE, topLevelDataflowID, true); // true -> is top level
        } catch (SQLException e) {
            logger.warn("Can't add processor " + topLevelDataflowID, e);
        }

        //      logger.info("top level wf name: "+topLevelDataflowName);
        return processDataflowStructure(df, topLevelDataflowID, df.getLocalName()); // null: no external name given to top level dataflow
    }

    /**
     * note: this method can be called as part of a recursion on sub-workflows
     * @param df 
     * @param dataflowID the UUID for the entire dataflow (may be a sub-dataflow)
     * @param localName the external name of the dataflow. Null if this is top level, not null if a sub-dataflow
     *  @return the wfInstanceRef for this workflow structure
     */
    @SuppressWarnings("unchecked")
    public String processDataflowStructure(Dataflow df, String dataflowID, String externalName) {

        String localWfInstanceID = getWfInstanceID();

        dataflowDepth++;

        try {

            List<Var> vars = new ArrayList<Var>();

            // check whether we already have this WF in the DB
            List<String> wfNames = null;
            try {
                wfNames = pq.getAllWFnames();
            } catch (SQLException e) {
                logger.warn("Problem processing dataflow structure for " + dataflowID, e);
            }

            if (wfNames != null && wfNames.contains(dataflowID)) { // already in the DB
                //            logger.info("workflow structure with ID "+dataflowID+" is in the DB -- clearing static portion");

                // clearing the portion of the static DB that pertains to this specific WF.
                // it is going to be rewritten right away in the rest of this method
                // this is simpler to implement than selectively avoiding duplicate writes to the DB
                pw.clearDBStatic(dataflowID);
            } else {
                //            logger.warn("new workflow structure with ID "+dataflowID);
            }

            // //////
            // add workflow ID -- this is NOT THE SAME AS the wfInstanceID
            // /////      

            // this could be a nested workflow -- in this case, override its wfInstanceID with that of its parent
            String parentDataflow;
            if ((parentDataflow = wfNestingMap.get(dataflowID)) == null) {
                Element serializeDataflow = XMLSerializerRegistry.getInstance().getSerializer()
                        .serializeDataflow(df);
                String dataflowString = null;
                try {
                    XMLOutputter outputter = new XMLOutputter();
                    StringWriter stringWriter = new StringWriter();
                    outputter.output(serializeDataflow, stringWriter);
                    dataflowString = stringWriter.toString();

                } catch (java.io.IOException e) {
                    logger.error("Could not serialise dataflow", e);
                }
                Blob blob = new SerialBlob(dataflowString.getBytes("UTF-8"));
                // this is a top level dataflow description
                pw.addWFId(dataflowID, null, externalName, blob); // set its dataflowID with no parent

                //   localWfInstanceID = DUMMY_INSTANCE_ID;   // CHECK ?? this was not set at all in this branch of the if-then
                if (getWfInstanceID() == null)
                    setWfInstanceID(DUMMY_INSTANCE_ID);

            } else {
                Element serializeDataflow = XMLSerializerRegistry.getInstance().getSerializer()
                        .serializeDataflow(df);
                String dataflowString = null;
                try {
                    XMLOutputter outputter = new XMLOutputter();
                    StringWriter stringWriter = new StringWriter();
                    outputter.output(serializeDataflow, stringWriter);
                    dataflowString = stringWriter.toString();

                } catch (java.io.IOException e) {
                    logger.error("Could not serialise dataflow", e);
                }

                Blob blob = new SerialBlob(dataflowString.getBytes("UTF-8"));
                // we are processing a nested workflow structure
                logger.debug("dataflow " + dataflowID + " with external name " + externalName + " is nested within "
                        + parentDataflow);

                pw.addWFId(dataflowID, parentDataflow, externalName, blob); // set its dataflowID along with its parent

                // override wfInstanceID to point to top level -- UNCOMMENTED PM 9/09  CHECK
                localWfInstanceID = pq.getRuns(parentDataflow, null).get(0).getInstanceID();
                //            logger.debug("overriding nested WFRef "+getWfInstanceID()+" with parent WFRef "+localWfInstanceID);

            }
            pw.addWFInstanceId(dataflowID, localWfInstanceID); // wfInstanceID stripped by stripWfInstanceHeader() above

            // //////
            // add processors along with their variables
            // /////
            List<? extends Processor> processors = df.getProcessors();

            for (Processor p : processors) {

                //            logger.info("adding processor "+p.getLocalName());

                String pName = p.getLocalName();

                //CHECK get type of first activity and set this as the type of the processor itself
                List<? extends Activity<?>> activities = p.getActivityList();

                String pType = null;
                if (activities != null && !activities.isEmpty()) {
                    pType = activities.get(0).getClass().getCanonicalName();
                }
                pw.addProcessor(pName, pType, dataflowID, false); // false: not a top level processor

                // ///
                // add all input ports for this processor as input variables
                // ///
                List<? extends ProcessorInputPort> inputs = p.getInputPorts();

                for (ProcessorInputPort ip : inputs) {

                    Var inputVar = new Var();

                    inputVar.setPName(pName);
                    inputVar.setWfInstanceRef(dataflowID);
                    inputVar.setVName(ip.getName());
                    inputVar.setTypeNestingLevel(ip.getDepth());
                    inputVar.setInput(true);

                    //               logger.info("processDataflowStructure: adding input var "+pName+":"+ip.getName());

                    vars.add(inputVar);
                }

                // ///
                // add all output ports for this processor as output variables
                // ///
                List<? extends ProcessorOutputPort> outputs = p.getOutputPorts();

                for (ProcessorOutputPort op : outputs) {

                    Var outputVar = new Var();

                    outputVar.setPName(pName);
                    outputVar.setWfInstanceRef(dataflowID);
                    outputVar.setVName(op.getName());
                    outputVar.setTypeNestingLevel(op.getDepth());
                    outputVar.setInput(false);

                    vars.add(outputVar);
                }

                // check for nested structures: if the activity is DataflowActivity
                // then this processor is a nested workflow
                // make an entry into wfNesting map with its ID and recurse on the nested workflow 

                for (Activity a : activities) {

                    if (a.getClass().getCanonicalName().contains("DataflowActivity")) {

                        Dataflow nested = (Dataflow) a.getConfiguration();
                        logger.debug("RECURSION ON nested workflow: " + p.getLocalName() + " with id: "
                                + nested.getInternalIdentier() + " from " + externalName + " at depth "
                                + dataflowDepth);

                        wfNestingMap.put(nested.getInternalIdentier(), dataflowID); // child -> parent

                        //////////////
                        /// RECURSIVE CALL 
                        //////////////
                        processDataflowStructure(nested, nested.getInternalIdentier(), p.getLocalName());

                        // PM added 5/10
                        dataflowDepth--;

                        //List<? extends Processor> procs = nested.getProcessors();                  
                        //                  for (Processor nestedP:procs) {
                        //                  System.out.println("recursion on nested processor: "+nestedP.getLocalName());
                        //                  }

                    }
                }

            } // end for each processor

            // ////
            // add inputs to entire dataflow
            // ////

            String pName = INPUT_CONTAINER_PROCESSOR; // overridden -- see below

            // check whether we are processing a nested workflow. in this case
            // the input vars are not assigned to the INPUT processor but to the containing dataflow

            if (externalName != null) { // override the default if we are nested or someone external name is provided
                pName = externalName;
            }

            List<? extends DataflowInputPort> inputPorts = df.getInputPorts();

            for (DataflowInputPort ip : inputPorts) {

                Var inputVar = new Var();

                inputVar.setPName(pName);
                inputVar.setWfInstanceRef(dataflowID);
                inputVar.setVName(ip.getName());
                inputVar.setTypeNestingLevel(ip.getDepth());
                inputVar.setInput(true); // CHECK PM modified 11/08 -- input vars are actually outputs of input processors...

                vars.add(inputVar);
            }

            // ////
            // add outputs of entire dataflow
            // ////
            pName = OUTPUT_CONTAINER_PROCESSOR; // overridden -- see below

            // check whether we are processing a nested workflow. in this case
            // the output vars are not assigned to the OUTPUT processor but to the containing dataflow

            if (externalName != null) { // we are nested
                pName = externalName;
            }

            List<? extends DataflowOutputPort> outputPorts = df.getOutputPorts();

            for (DataflowOutputPort op : outputPorts) {

                Var outputVar = new Var();

                outputVar.setPName(pName);
                outputVar.setWfInstanceRef(dataflowID);
                outputVar.setVName(op.getName());
                outputVar.setTypeNestingLevel(op.getDepth());
                outputVar.setInput(false); // CHECK PM modified 11/08 -- output vars are actually outputs of output processors... 
                vars.add(outputVar);
            }

            pw.addVariables(vars, dataflowID);

            // ////
            // add arc records using the dataflow links
            // retrieving the processor names requires navigating from links to
            // source/sink and from there to the processors
            // ////
            List<? extends Datalink> links = df.getLinks();

            for (Datalink l : links) {

                // TODO cover the case of arcs from an input and to an output to
                // the entire dataflow

                String sourcePname = null;
                String sinkPname = null;

                if (l.getSource() instanceof ProcessorOutputPort) {
                    sourcePname = ((ProcessorOutputPort) l.getSource()).getProcessor().getLocalName();
                } else {
                    //               System.out.println("found link from dataflow input");
                }

                if (l.getSink() instanceof ProcessorInputPort) {
                    sinkPname = ((ProcessorInputPort) l.getSink()).getProcessor().getLocalName();
                } else {
                    //               System.out.println("found link to dataflow output");
                }

                if (sourcePname != null && sinkPname != null) {
                    //               System.out.println("adding regular internal arc");

                    pw.addArc(l.getSource().getName(), sourcePname, l.getSink().getName(), sinkPname, dataflowID);

                } else if (sourcePname == null) {
                    // link is from dataflow input or subflow input
                    if (externalName != null) { // link from subflow input
                        sourcePname = externalName;
                    } else {
                        sourcePname = INPUT_CONTAINER_PROCESSOR;
                    }

                    //Ian added this logic since there were some null sinkPnameRefs with merge ports

                    if (sinkPname == null) {
                        // link is to dataflow output

                        if (externalName != null) { // link from subflow input
                            sinkPname = externalName;
                        } else {
                            sinkPname = OUTPUT_CONTAINER_PROCESSOR;
                        }
                    }

                    //               System.out.println("adding arc from dataflow input");

                    pw.addArc(l.getSource().getName(), sourcePname, l.getSink().getName(), sinkPname, dataflowID);

                } else if (sinkPname == null) {
                    // link is to dataflow output

                    if (externalName != null) { // link from subflow input
                        sinkPname = externalName;
                    } else {
                        sinkPname = OUTPUT_CONTAINER_PROCESSOR;
                    }

                    //Ian added this bit at the same time as the null sinkPnameRef logic above - hope it is correct

                    if (sourcePname == null) {
                        // link is from dataflow input or subflow input
                        if (externalName != null) { // link from subflow input
                            sourcePname = externalName;
                        } else {
                            sourcePname = INPUT_CONTAINER_PROCESSOR;
                        }
                    }

                    //               System.out.println("adding arc to dataflow output");

                    pw.addArc(l.getSource().getName(), sourcePname, l.getSink().getName(), sinkPname, dataflowID);
                }
            }
            //         logger.info("completed processing dataflow " + dataflowID);

        } catch (Exception e) {
            logger.error("Problem processing provenance for dataflow", e);
        }

        //      logger.debug("wfInstanceID at the end of processDataflowStructure: "+getWfInstanceID());

        return dataflowID;
    }

    private Element stripWfInstanceHeader(String content) {

        SAXBuilder b = new SAXBuilder();
        Document d;

        try {
            d = b.build(new StringReader(content));

            // get identifier from <workflowItem> element
            Element root = d.getRootElement();

            setWfInstanceID(root.getAttributeValue("identifier"));

            Namespace ns = Namespace.getNamespace("http://taverna.sf.net/2008/xml/t2flow");

            Element workflowEl = root.getChild("workflow", ns);

            return workflowEl;

        } catch (JDOMException e) {
            logger.warn("Problem stripping workflow instance header", e);
        } catch (IOException e) {
            logger.warn("Problem stripping workflow instance header", e);
        }

        return null;
    }

    /**
     * processes an elementary process execution event from T2. Collects info
     * from events as they happen and sends them to the writer for processing
     * when the iteration event is received. Uses the map of procBindings to
     * process event id and the map of child ids to parent ids to ensure that
     * the correct proc binding is used
     * @param currentWorkflowID 
     * 
     * @param d
     * @param context 
     */
    public void processProcessEvent(ProvenanceItem provenanceItem, String currentWorkflowID) {

        if (provenanceItem.getEventType().equals(SharedVocabulary.PROCESS_EVENT_TYPE)) {

            String parentId = provenanceItem.getParentId(); // this is the workflowID
            String identifier = provenanceItem.getIdentifier(); // use this as wfInstanceID if this is the top-level process

            parentChildMap.put(identifier, parentId);
            ProcBinding pb = new ProcBinding();
            pb.setExecIDRef(getWfInstanceID());
            pb.setWfNameRef(currentWorkflowID);
            procBindingMap.put(identifier, pb);

        } else if (provenanceItem.getEventType().equals(SharedVocabulary.PROCESSOR_EVENT_TYPE)) {

            String identifier = provenanceItem.getIdentifier();
            String parentId = provenanceItem.getParentId();
            String processID = provenanceItem.getProcessId(); // this is the external process ID

            // this has the weird form facade0:dataflowname:pname  need to extract pname from here
            String[] processName = processID.split(":");
            procBindingMap.get(parentId).setPNameRef(processName[processName.length - 1]); // 3rd component of composite name

            parentChildMap.put(identifier, parentId);

        } else if (provenanceItem.getEventType().equals(SharedVocabulary.ACTIVITY_EVENT_TYPE)) {

            String identifier = provenanceItem.getIdentifier();
            String parentId = provenanceItem.getParentId();
            procBindingMap.get(parentChildMap.get(parentId)).setActName(identifier);
            parentChildMap.put(identifier, parentId);

        } else if (provenanceItem.getEventType().equals(SharedVocabulary.ITERATION_EVENT_TYPE)) {

            // traverse up to root to retrieve ProcBinding that was created when we saw the process event 
            String iterationID = provenanceItem.getIdentifier();
            String activityID = provenanceItem.getParentId();
            String processorID = parentChildMap.get(activityID);
            String processID = parentChildMap.get(processorID);
            parentChildMap.put(iterationID, activityID);
            parentChildMap.put(iterationID, activityID);
            ProcBinding procBinding = procBindingMap.get(processID);

            String itVector = extractIterationVector(
                    ProvenanceUtils.iterationToString(((IterationProvenanceItem) provenanceItem).getIteration()));
            procBinding.setIterationVector(itVector);
            InputDataProvenanceItem inputDataEl = ((IterationProvenanceItem) provenanceItem).getInputDataItem();
            OutputDataProvenanceItem outputDataEl = ((IterationProvenanceItem) provenanceItem).getOutputDataItem();
            processInput(inputDataEl, procBinding, currentWorkflowID);
            processOutput(outputDataEl, procBinding, currentWorkflowID);

            try {
                getPw().addProcessorBinding(procBinding);
            } catch (SQLException e) {
                logger.warn("provenance has duplicate processor binding -- skipping the insertion"); //, e);
            }
        } else if (provenanceItem.getEventType().equals(SharedVocabulary.END_WORKFLOW_EVENT_TYPE)) {

            // use this event to do housekeeping on the input/output varbindings 

            dataflowDepth--;

            // process the outputs accumulated by WorkflowDataProcessor
            getWfdp().processTrees(provenanceItem.getWorkflowId(), getWfInstanceID());

            // PM changed 5/10 -- CHECK
            String dataflowName = getPq().getWfFromDataflowID(provenanceItem.getParentId()).getExternalName();
            reconcileLocalOutputs(dataflowName, provenanceItem.getParentId()); // patchTopLevelOutputs      

            if (dataflowDepth == 0) {

                // temp disabled due to duplicate insert problem 5/10
                patchTopLevelnputs();

                workflowStructureDone = false; // CHECK reset for next run... 

                getPw().closeCurrentModel(); // only real impl is for RDF
            }

        } else if (provenanceItem.getEventType().equals(SharedVocabulary.WORKFLOW_DATA_EVENT_TYPE)) {
            // give this event to a WorkflowDataProcessor object for pre-processing
            //         try {
            // TODO may generate an exception when the data is an error CHECK
            getWfdp().addWorkflowDataItem(provenanceItem);
            //         } catch (NumberFormatException e) {
            //         logger.error(e);
            //         }
            //         logger.info("Received workflow data - not processing");
            //FIXME not sure  - needs to be stored somehow

        } else if (provenanceItem.getEventType().equals((SharedVocabulary.ERROR_EVENT_TYPE))) {
            //TODO process the error

        } else {
            // TODO broken, should we throw something here?
            return;
        }

    }

    /**
     * fills in the VBs for the global inputs -- this removes the need for explicit events
     * that account for these value bindings...
     */
    public void patchTopLevelnputs() {

        // for each input I to topLevelDataflow:
        // pick first outgoing arc with sink P:X
        // copy value X to I -- this can be a collection, so copy everything

        // get all global input vars

        //      logger.info("\n\n BACKPATCHING GLOBAL INPUTS with dataflowDepth = "+dataflowDepth+"*******\n");

        List<Var> inputs = null;
        try {
            inputs = getPq().getInputVars(topLevelDataflowName, topLevelDataflowID, getWfInstanceID());

            for (Var input : inputs) {

                //            logger.info("global input: "+input.getVName());

                Map<String, String> queryConstraints = new HashMap<String, String>();

                queryConstraints.put("sourceVarNameRef", input.getVName());
                queryConstraints.put("sourcePNameRef", input.getPName());

                List<Arc> outgoingArcs = getPq().getArcs(queryConstraints);

                // any arc will do, use the first
                String targetPname = outgoingArcs.get(0).getSinkPnameRef();
                String targetVname = outgoingArcs.get(0).getSinkVarNameRef();

                //            logger.info("copying values from ["+targetPname+":"+targetVname+"] for instance ID: ["+wfInstanceID+"]");

                queryConstraints.clear();
                queryConstraints.put("varNameRef", targetVname);
                queryConstraints.put("V.pNameRef", targetPname);
                queryConstraints.put("VB.wfInstanceRef", getWfInstanceID());
                queryConstraints.put("V.wfInstanceRef", topLevelDataflowID);

                List<VarBinding> VBs = getPq().getVarBindings(queryConstraints);

                //            logger.info("found the following VBs:");
                for (VarBinding vb : VBs) {
                    //               logger.info(vb.getValue());

                    // insert VarBinding back into VB with the global input varname
                    vb.setPNameRef(input.getPName());
                    vb.setVarNameRef(input.getVName());
                    getPw().addVarBinding(vb);

                    //               logger.info("added");

                }

            }
        } catch (SQLException e) {
            logger.warn("Patch top level inputs problem for provenance", e);
        } catch (IndexOutOfBoundsException e) {
            logger.error("Could not patch top level", e);
        }

    }

    public void reconcileTopLevelOutputs() {
        reconcileLocalOutputs(topLevelDataflowName, topLevelDataflowID);
    }

    // PM added 23/4/09
    /**
     * reconcile the top level outputs with the results from its immediate precedessors in the graph.<br/>
     * various cases have to be considered: predecessors may include records that are not in the output, 
     * while the output may include nested list structures that are not in the precedessors. This method accounts
     * for a 2-way reconciliation that considers all possible cases.<br/>
     * at the end, outputs and their predecessors contain the same data.<p/>
     * NOTE: if we assume that data values (URIs) are <em>always</em> unique then this is greatly simplified by just
     * comparing two sets of value records by their URIs and reconciling them. But this is not the way it is done here
     */
    public void reconcileLocalOutputs(String dataflowName, String dataflowID) {
        /*
        for each output O
            
        for each variable V in predecessors(O)
            
        fetch all VB records for O into list OValues
        fetch all VB records for V  into list Yalues
            
        compare OValues and VValues:
        it SHOULD be the case that OValues is a subset of YValues. Under this assumption:
            
        for each vb in YValues:
        - if there is a matching o in OValues then (vb may be missing collection information)
         copy o to vb
        else 
         if vb has no collection info && there is a matching tree node tn  in OTree (use iteration index for the match) then   
            set vb to be in collection tb
            copy vb to o
            
        finally copy all Collection records for O in OTree -- catch duplicate errors
         */

        Map<String, String> queryConstraints = new HashMap<String, String>();

        List<Var> outputs = null;
        try {

            outputs = pq.getOutputVars(dataflowName, dataflowID, null); // null InstanceID 

            // for each output O
            for (Var output : outputs) {

                // collect all VBs for O
                //            String oPName = output.getPName();
                //            String oVName = output.getVName();
                //            queryConstraints.put("varNameRef", oVName);
                //            queryConstraints.put("V.pNameRef", oPName);
                //            queryConstraints.put("VB.wfInstanceRef", wfInstanceID);
                //            queryConstraints.put("V.wfInstanceRef", topLevelDataflowID);

                //            List<VarBinding> OValues = pq.getVarBindings(queryConstraints);

                // find all records for the immediate precedessor Y of O
                queryConstraints.clear();
                queryConstraints.put("sinkVarNameRef", output.getVName());
                queryConstraints.put("sinkPNameRef", output.getPName());

                List<Arc> incomingArcs = pq.getArcs(queryConstraints);

                // there can be only one -- but check that there is one!
                if (incomingArcs.size() == 0)
                    continue;

                String sourcePname = incomingArcs.get(0).getSourcePnameRef();
                String sourceVname = incomingArcs.get(0).getSourceVarNameRef();

                queryConstraints.clear();
                queryConstraints.put("varNameRef", sourceVname);
                queryConstraints.put("V.pNameRef", sourcePname);
                queryConstraints.put("VB.wfInstanceRef", getWfInstanceID());
                queryConstraints.put("V.wfInstanceRef", dataflowID);

                List<VarBinding> YValues = pq.getVarBindings(queryConstraints);

                // for each YValue look for a match in OValues
                // (assume the YValues values are a superset of OValues)!)

                for (VarBinding yValue : YValues) {

                    //               System.out.println("reconcileTopLevelOutputs:: processing "+
                    //               yValue.getPNameRef()+"/"+yValue.getVarNameRef()+"/"+yValue.getValue()+
                    //               " with collid "+yValue.getCollIDRef());

                    // look for a matching record in VarBinding for output O
                    queryConstraints.clear();
                    queryConstraints.put("varNameRef", output.getVName());
                    queryConstraints.put("V.pNameRef", output.getPName());
                    queryConstraints.put("VB.wfInstanceRef", getWfInstanceID());
                    queryConstraints.put("V.wfInstanceRef", dataflowID);
                    queryConstraints.put("VB.iteration", yValue.getIteration());
                    if (yValue.getCollIDRef() != null) {
                        queryConstraints.put("VB.collIDRef", yValue.getCollIDRef());
                        queryConstraints.put("VB.positionInColl", Integer.toString(yValue.getPositionInColl()));
                    }
                    List<VarBinding> matchingOValues = pq.getVarBindings(queryConstraints);

                    //               System.out.println("querying for matching oValues: ");

                    // result at most size 1
                    if (matchingOValues.size() > 0) {

                        VarBinding oValue = matchingOValues.get(0);

                        //                  System.out.println("found "+oValue.getPNameRef()+"/"+oValue.getVarNameRef()+"/"+oValue.getValue()+
                        //                  " with collid "+oValue.getCollIDRef());

                        // copy collection info from oValue to yValue                  
                        yValue.setCollIDRef(oValue.getCollIDRef());
                        yValue.setPositionInColl(oValue.getPositionInColl());

                        pw.updateVarBinding(yValue);

                        //                  System.out.println("oValue copied to yValue");
                    } else {

                        //                  System.out.println("no match found");

                        // copy the yValue to O 
                        // insert VarBinding back into VB with the global output varname
                        yValue.setPNameRef(output.getPName());
                        yValue.setVarNameRef(output.getVName());
                        pw.addVarBinding(yValue);
                    }

                } // for each yValue in YValues

                // copy all Collection records for O to Y 

                // get all collections refs for O
                queryConstraints.clear();
                queryConstraints.put("wfInstanceRef", getWfInstanceID());
                queryConstraints.put("PNameRef", output.getPName());
                queryConstraints.put("varNameRef", output.getVName());

                List<NestedListNode> oCollections = pq.getNestedListNodes(queryConstraints);

                // insert back as collection refs for Y -- catch duplicates
                for (NestedListNode nln : oCollections) {
                    //               System.out.println("collection: "+nln.getCollId());

                    nln.setPNameRef(sourcePname);
                    nln.setPNameRef(sourceVname);

                    getPw().replaceCollectionRecord(nln, sourcePname, sourceVname);
                }

            } // for each output var

        } catch (SQLException e) {
            logger.warn("Problem reconciling top level outputs", e);
        }

    }

    @SuppressWarnings("unchecked")
    private void processOutput(OutputDataProvenanceItem provenanceItem, ProcBinding procBinding,
            String currentWorkflowID) {

        Element dataItemAsXML = ProvenanceUtils.getDataItemAsXML(provenanceItem);
        List<Element> outputPorts = dataItemAsXML.getChildren("port");
        for (Element outputport : outputPorts) {

            String portName = outputport.getAttributeValue("name");

            // value type may vary
            List<Element> valueElements = outputport.getChildren();
            if (valueElements != null && valueElements.size() > 0) {

                Element valueEl = valueElements.get(0); // only really 1 child

                processVarBinding(valueEl, procBinding.getPNameRef(), portName, procBinding.getIterationVector(),
                        getWfInstanceID(), currentWorkflowID);
            }
        }

    }

    /**
     * this method reconciles values in varBindings across an arc: Firstly, if vb's value is within a collection,
     *  _and_ it is copied from a value generated during a previous iteration,
     * then this method propagates the list reference to that iteration value, which wouldn't have it.
     * Conversely, if vb is going to be input to an iteration, then it's lost its containing list node, and we
     * put it back in by looking at the corresponding predecessor 
     * @param vb
     * @throws SQLException 
     */
    private void backpatchIterationResults(List<VarBinding> newBindings) throws SQLException {

        logger.debug("backpatchIterationResults: start");
        for (VarBinding vb : newBindings) {

            logger.debug("backpatchIterationResults: processing vb " + vb.getPNameRef() + "/" + vb.getVarNameRef()
                    + "=" + vb.getValue());

            if (vb.getCollIDRef() != null) { // this is a member of a collection
                logger.debug("...which is inside a collection ");
            }

            // look for its antecedent
            Map<String, String> queryConstraints = new HashMap<String, String>();

            queryConstraints.put("sinkVarNameRef", vb.getVarNameRef());
            queryConstraints.put("sinkPNameRef", vb.getPNameRef());
            queryConstraints.put("wfInstanceRef", pq.getWfNames(vb.getWfInstanceRef()).get(0)); // CHECK picking first element in list...

            List<Arc> incomingArcs = pq.getArcs(queryConstraints);

            // there can be only one -- but check that there is one!
            if (incomingArcs.size() == 0)
                return;

            String sourcePname = incomingArcs.get(0).getSourcePnameRef();
            String sourceVname = incomingArcs.get(0).getSourceVarNameRef();

            logger.debug("antecedent: " + sourcePname + ":" + sourceVname);

            // get the varbindings for this port and select the one with the same iteration vector as its successor
            queryConstraints.clear();
            queryConstraints.put("varNameRef", sourceVname);
            queryConstraints.put("V.pNameRef", sourcePname);
            queryConstraints.put("VB.value", vb.getValue());
            queryConstraints.put("VB.wfInstanceRef", vb.getWfInstanceRef());

            List<VarBinding> VBs = pq.getVarBindings(queryConstraints);

            if (VBs.size() == 0) {
                logger.debug("nothing to reconcile");
            }

            // reconcile
            for (VarBinding b : VBs) {

                logger.debug("backpatching " + sourceVname + " " + sourcePname);

                if (vb.getCollIDRef() != null && b.getCollIDRef() == null) {

                    logger.debug("successor " + vb.getVarNameRef() + " is in collection " + vb.getCollIDRef()
                            + " but pred " + b.getVarNameRef() + " is not");
                    logger.debug("putting " + b.getVarNameRef() + " in collection " + vb.getCollIDRef() + " at pos "
                            + vb.getPositionInColl());
                    b.setCollIDRef(vb.getCollIDRef());
                    b.setPositionInColl(vb.getPositionInColl());
                    getPw().updateVarBinding(b);

                } else if (vb.getCollIDRef() == null && b.getCollIDRef() != null) {

                    logger.debug("successor " + vb.getVarNameRef() + " is NOT in collection but pred "
                            + b.getVarNameRef() + " IS");
                    logger.debug("putting " + vb.getVarNameRef() + " in collection " + b.getCollIDRef() + " at pos "
                            + b.getPositionInColl());
                    vb.setCollIDRef(b.getCollIDRef());
                    vb.setPositionInColl(b.getPositionInColl());
                    getPw().updateVarBinding(vb);
                }
            }
        }
    }

    /**
     * create one new VarBinding record for each input port binding
     * @param currentWorkflowID 
     */
    @SuppressWarnings("unchecked")
    private void processInput(InputDataProvenanceItem provenanceItem, ProcBinding procBinding,
            String currentWorkflowID) {

        Element dataItemAsXML = ProvenanceUtils.getDataItemAsXML(provenanceItem);
        List<Element> inputPorts = dataItemAsXML.getChildren("port");
        int order = 0;
        for (Element inputport : inputPorts) {

            String portName = inputport.getAttributeValue("name");
            //         logger.info("processInput: processing VarBinding for "+procBinding.getPNameRef()+"  "+portName);

            try {
                // add process order sequence to Var for this portName

                Map<String, String> queryConstraints = new HashMap<String, String>();
                queryConstraints.put("wfInstanceRef", currentWorkflowID);
                queryConstraints.put("pnameRef", procBinding.getPNameRef());
                queryConstraints.put("varName", portName);
                queryConstraints.put("inputOrOutput", "1");

                List<Var> vars = getPq().getVars(queryConstraints);
                try {
                    Var v = vars.get(0);
                    v.setPortNameOrder(order++);
                    getPw().updateVar(v);
                } catch (IndexOutOfBoundsException e) {
                    logger.error("Could not process input " + portName, e);
                }
            } catch (SQLException e1) {
                logger.error("Could not process input " + portName, e1);
            }

            // value type may vary
            List<Element> valueElements = inputport.getChildren(); // hopefully
            // in the
            // right
            // order...
            if (valueElements != null && valueElements.size() > 0) {

                Element valueEl = valueElements.get(0); // expect only 1 child
                //            processVarBinding(valueEl, processor, portName, iterationVector,
                //            dataflow);

                List<VarBinding> newBindings = processVarBinding(valueEl, procBinding.getPNameRef(), portName,
                        procBinding.getIterationVector(), getWfInstanceID(), currentWorkflowID);
                // this is a list whenever valueEl is of type list: in this case processVarBinding recursively
                // processes all values within the collection, and generates one VarBinding record for each of them

                allInputVarBindings.addAll(newBindings);

                //            logger.debug("newBindings now has "+newBindings.size()+" elements");

                //            // if the new binding involves list values, then check to see if they need to be propagated back to 
                //            // results of iterations
                try {
                    backpatchIterationResults(newBindings);
                } catch (SQLException e) {
                    logger.warn("Problem with back patching iteration results", e);

                }

            } else {
                if (valueElements != null)
                    logger.debug("port name " + portName + "  " + valueElements.size());
                else
                    logger.debug("valueElements is null for port name " + portName);
            }
        }

    }

    /**
     * capture the default case where the value is not a list
     * 
     * @param valueEl
     * @param processorId
     * @param portName
     * @param iterationId
     * @param wfInstanceRef
     * @param currentWorkflowID 
     */
    private List<VarBinding> processVarBinding(Element valueEl, String processorId, String portName,
            String iterationId, String wfInstanceRef, String currentWorkflowID) {

        // uses the defaults:
        // collIdRef = null
        // parentcollectionRef = null
        // positionInCollection = 1
        return processVarBinding(valueEl, processorId, portName, null, 1, null, iterationId, wfInstanceRef, null,
                currentWorkflowID);
    }

    /**
     * general case where value can be a list
     * @param valueEl
     * @param processorId
     * @param portName
     * @param collIdRef
     * @param positionInCollection
     * @param parentCollectionRef
     * @param iterationId
     * @param wfInstanceRef
     * @param currentWorkflowID 
     */
    @SuppressWarnings("unchecked")
    private List<VarBinding> processVarBinding(Element valueEl, String processorId, String portName,
            String collIdRef, int positionInCollection, String parentCollectionRef, String iterationId,
            String wfInstanceRef, String itVector, String currentWorkflowID) {

        List<VarBinding> newBindings = new ArrayList<VarBinding>();

        String valueType = valueEl.getName();
        //      logger.info("value element for " + processorId + ": "
        //      + valueType);

        String iterationVector = null;

        if (itVector == null)
            iterationVector = extractIterationVector(iterationId);
        else
            iterationVector = itVector;

        VarBinding vb = new VarBinding();

        vb.setWfNameRef(currentWorkflowID);
        vb.setWfInstanceRef(wfInstanceRef);
        vb.setPNameRef(processorId);
        vb.setValueType(valueType);
        vb.setVarNameRef(portName);
        vb.setCollIDRef(collIdRef);
        vb.setPositionInColl(positionInCollection);

        newBindings.add(vb);

        if (valueType.equals("literal")) {

            //         logger.warn("input of type literal");

            try {

                vb.setIterationVector(iterationVector);
                vb.setValue(valueEl.getAttributeValue("id"));

                logger.debug("new input VB with wfNameRef=" + currentWorkflowID + " processorId=" + processorId
                        + " valueType=" + valueType + " portName=" + portName + " collIdRef=" + collIdRef
                        + " position=" + positionInCollection + " itvector=" + iterationVector + " value="
                        + vb.getValue());

                //            logger.info("calling addVarBinding on "+vb.getPNameRef()+" : "+vb.getVarNameRef()); 
                getPw().addVarBinding(vb);

            } catch (SQLException e) {
                logger.warn("Process Var Binding problem with provenance", e);
            }

        } else if (valueType.equals("referenceSet")) {

            vb.setIterationVector(iterationVector);
            vb.setValue(valueEl.getAttributeValue("id"));
            vb.setRef(valueEl.getChildText("reference"));

            logger.debug("new input VB with wfNameRef=" + currentWorkflowID + " processorId=" + processorId
                    + " valueType=" + valueType + " portName=" + portName + " collIdRef=" + collIdRef + " position="
                    + positionInCollection + " itvector=" + iterationVector + " value=" + vb.getValue());

            try {
                //            logger.debug("calling addVarBinding on "+vb.getPNameRef()+" : "+vb.getVarNameRef()+" with it "+vb.getIteration()); 
                getPw().addVarBinding(vb);
            } catch (SQLException e) {
                logger.debug("Problem processing var binding -- performing update instead of insert"); //, e);
                // try to update the existing record instead using the current collection info

                getPw().updateVarBinding(vb);
                //            logger.warn("VarBinding update successful");

            }

        } else if (valueType.equals("list")) {

            logger.debug("input of type list");

            // add entries to the Collection and to the VarBinding tables
            // list id --> Collection.collId

            String collId = valueEl.getAttributeValue("id");
            try {

                parentCollectionRef = getPw().addCollection(processorId, collId, parentCollectionRef,
                        iterationVector, portName, wfInstanceRef);

                // iterate over each list element
                List<Element> listElements = valueEl.getChildren();

                positionInCollection = 1; // also use this as a suffix to extend the iteration vector

                // extend iteration vector to account for additional levels within the list

                String originalIterationVector = iterationVector;

                // children can be any base type, including list itself -- so
                // use recursion
                for (Element el : listElements) {

                    if (originalIterationVector.length() > 2) { // vector is not empty
                        iterationVector = originalIterationVector.substring(0, originalIterationVector.length() - 1)
                                + "," + Integer.toString(positionInCollection - 1) + "]";
                    } else {
                        iterationVector = "[" + Integer.toString(positionInCollection - 1) + "]";
                    }

                    List<VarBinding> bindings = processVarBinding(el, processorId, portName, collId,
                            positionInCollection, parentCollectionRef, iterationId, wfInstanceRef, iterationVector,
                            currentWorkflowID);

                    newBindings.addAll(bindings);

                    positionInCollection++;
                }

            } catch (SQLException e) {
                logger.warn("Problem processing var binding", e);
            }
        } else if (valueType.equals("error")) {
            try {
                vb.setIterationVector(iterationVector);
                vb.setValue(valueEl.getAttributeValue("id"));

                getPw().addVarBinding(vb);

            } catch (SQLException e) {
                logger.warn("Process Var Binding problem with provenance", e);
            }
        } else {
            logger.warn("unrecognized value type element for " + processorId + ": " + valueType);
        }

        return newBindings;
    }

    /**
     * OBSOLETE: returns the iteration vector x,y,z,... from [x,y,z,...]
     * <p/>
     * now returns the vector itself -- this is still experimental
     * 
     * @param iteration
     * @return
     */
    String extractIterationVector(String iteration) {

        return iteration;
        // return iteration.substring(1, iteration.length() - 1);
        // iteration is of the form "[n]" so we extract n
        // String iterationN = iteration.substring(1, iteration.length()-1);

        // if (iterationN.length() == 0) return 0;

        // return Integer.parseInt(iterationN);
    }

    /**
     * log raw event to file system
     * 
     * @param content
     * @param eventType
     * @throws IOException
     */
    public void saveEvent(ProvenanceItem provenanceItem, SharedVocabulary eventType) throws IOException {

        // HACK -- XMLEncoder fails on IterationEvents and there is no way to catch the exception...
        // so avoid this case
        if (eventType.equals(SharedVocabulary.ITERATION_EVENT_TYPE)) {
            return;
        }

        //      System.out.println("saveEvent: start");

        File f1 = null;

        f1 = new File(TEST_EVENTS_FOLDER);
        FileUtils.forceMkdir(f1);

        String fname = "event_" + eventCnt++ + "_" + eventType + ".xml";
        File f = new File(f1, fname);

        //      FileWriter fw = new FileWriter(f);

        XMLEncoder en = new XMLEncoder(new BufferedOutputStream(new FileOutputStream(f)));

        en.setExceptionListener(new ExceptionListener() {
            public void exceptionThrown(Exception e) {
                logger.warn("XML encoding ERROR", e);
                return;
            }
        });

        logger.debug("saving to " + f); // save event for later inspection
        logger.debug(provenanceItem);

        en.writeObject(provenanceItem);

        logger.debug("writer ok");
        en.close();
        logger.debug("closed");

        //      fw.write(content);
        //      fw.flush();
        //      fw.close();

        //      FileWriter fw = new FileWriter(f);
        //      fw.write(content);
        //      fw.flush();
        //      fw.close();

        //      System.out.println("saved as file " + fname);

    }

    /**
     * for each arc of the form (_INPUT_/I, P/V): propagate VarBinding for P/V
     * to var _INPUT_/I <br/>
     * 
     * @throws SQLException
     */
    public void fillInputVarBindings(Object context) throws SQLException {

        // System.out.println("*** fillInputVarBindings: ***");

        // retrieve appropriate arcs
        Map<String, String> constraints = new HashMap<String, String>();
        constraints.put("sourcePnameRef", "_INPUT_");
        constraints.put("W.instanceID", getWfInstanceID());
        List<Arc> arcs = getPq().getArcs(constraints);

        // backpropagate VarBinding from the target var of the arc to the source
        for (Arc aArc : arcs) {

            //         logger.info("propagating VarBinding from ["
            //         + aArc.getSinkPnameRef() + "/" + aArc.getSinkVarNameRef()
            //         + "] to input [" + aArc.getSourcePnameRef() + "/"
            //         + aArc.getSourceVarNameRef() + "]");

            // get the varBinding for the arc sinks
            Map<String, String> vbConstraints = new HashMap<String, String>();
            vbConstraints.put("VB.PNameRef", aArc.getSinkPnameRef());
            vbConstraints.put("VB.varNameRef", aArc.getSinkVarNameRef());
            vbConstraints.put("VB.wfInstanceRef", getWfInstanceID());

            List<VarBinding> vbList = getPq().getVarBindings(vbConstraints); // DB
            // QUERY

            for (VarBinding vb : vbList) {
                // add a new VarBinding for the input

                vb.setPNameRef(aArc.getSourcePnameRef());
                vb.setVarNameRef(aArc.getSourceVarNameRef());
                // all other attributes are the same --> CHECK!!

                getPw().addVarBinding(vb);
            }
        }
    }

    /**
     * for each arc of the form (P/V, _OUTPUT_/O): propagate VarBinding for P/V
     * to var _OUTPUT_/O <br/>
     * 
     * @throws SQLException
     */
    public void fillOutputVarBindings(Object context) throws SQLException {

        //System.out.println("*** fillOutputVarBindings: ***");

        // retrieve appropriate arcs
        Map<String, String> constraints = new HashMap<String, String>();
        constraints.put("sinkPnameRef", "_OUTPUT_");
        constraints.put("wfInstanceRef", getWfInstanceID());
        List<Arc> arcs = getPq().getArcs(constraints);

        // fowd propagate VarBinding from the source var of the arc to the
        // output
        for (Arc aArc : arcs) {

            //         logger.info("fwd propagating VarBinding from ["
            //         + aArc.getSourcePnameRef() + "/"
            //         + aArc.getSourceVarNameRef() + "] to input ["
            //         + aArc.getSinkPnameRef() + "/" + aArc.getSinkVarNameRef()
            //         + "]");

            // get the varBinding for the arc sinks
            Map<String, String> vbConstraints = new HashMap<String, String>();
            vbConstraints.put("VB.PNameRef", aArc.getSourcePnameRef());
            vbConstraints.put("VB.varNameRef", aArc.getSourceVarNameRef());
            vbConstraints.put("VB.wfInstanceRef", getWfInstanceID());

            List<VarBinding> vbList = getPq().getVarBindings(vbConstraints); // DB
            // QUERY

            for (VarBinding vb : vbList) {
                // add a new VarBinding for the input
                getPw().addVarBinding(vb); // DB UPDATE
            }

        }
    }

    /**
     * silly class to hold pairs of strings. any better way??
     * @author paolo
     *
     */
    class Pair {
        String v1, v2;

        public Pair(String current, String wfNameRef) {
            v1 = current;
            v2 = wfNameRef;
        }

        /**
         * @return the v1
         */
        public String getV1() {
            return v1;
        }

        /**
         * @param v1 the v1 to set
         */
        public void setV1(String v1) {
            this.v1 = v1;
        }

        /**
         * @return the v2
         */
        public String getV2() {
            return v2;
        }

        /**
         * @param v2 the v2 to set
         */
        public void setV2(String v2) {
            this.v2 = v2;
        }

    }

    public List<Pair> toposort(String dataflowName, String wfInstanceId) throws SQLException {

        //      String wfNameRef = pq.getWfNameForDataflow(dataflowName, wfInstanceID);
        String wfNameRef = pq.getWfNameForDataflow(dataflowName);

        // fetch processors along with the count of their predecessors
        Map<String, Integer> predecessorsCount = getPq().getPredecessorsCount(wfInstanceId);
        Map<String, List<String>> successorsOf = new HashMap<String, List<String>>();
        //      List<String> procList = pq.getContainedProcessors(dataflowName, wfInstanceId);
        List<String> procList = pq.getContainedProcessors(dataflowName);

        //      logger.debug("toposort on "+dataflowName);

        //      logger.debug("contained procs: ");
        for (String s : procList) {

            List<String> successors = getPq().getSuccProcessors(s, wfNameRef, wfInstanceId);
            successorsOf.put(s, successors);

            //         logger.debug(s+" with "+predecessorsCount.get(s)+" predecessors and successors:");

            //         for (String s1:successors) { logger.debug(s1); }
        }

        List<Pair> sorted = tsort(procList, dataflowName, predecessorsCount, successorsOf, wfNameRef, wfInstanceId);

        //      logger.debug("tsort:");
        //      for (String p : sorted) { logger.debug(p); }

        for (int i = 0; i < sorted.size(); i++) {

            String procName = sorted.get(i).getV1();

            if (pq.isDataflow(procName) && !procName.equals(dataflowName)) { // handle weirdness: a dataflow is contained within itself..
                // recurse on procName

                //            logger.debug("recursion on "+procName);
                List<Pair> sortedSublist = toposort(procName, wfInstanceId);

                // replace procName with sortedSublist in sorted
                sorted.remove(i);
                sorted.addAll(i, sortedSublist);
            }
        }
        return sorted;
    }

    /**
     * @param procList
     * @param predecessorsCount 
     * @param successorsOf 
     * @param wfInstanceId 
     * @return
     * @throws SQLException 
     */
    public List<Pair> tsort(List<String> procList, String dataflowName, Map<String, Integer> predecessorsCount,
            Map<String, List<String>> successorsOf, String wfNameRef, String wfInstanceId) throws SQLException {

        List<Pair> L = new ArrayList<Pair>(); // holds sorted elements
        List<String> Q = new ArrayList<String>(); // temp queue

        //      logger.debug("queue init with procList");
        // init queue with procList processors that have no predecessors
        for (String proc : procList) {

            //         logger.debug("dataflowName: "+dataflowName+" proc: "+proc);

            if (predecessorsCount.get(proc) == null
                    || predecessorsCount.get(proc) == 0 && !proc.equals(dataflowName)) {

                Q.add(proc);
            }
            //         logger.debug(proc + " added to queue");
            //         } else 
            //         logger.debug(proc+" not added to queue");
        }

        //      logger.debug("queue has "+Q.size()+" elements");
        while (!Q.isEmpty()) {

            String current = Q.remove(0);
            //         logger.debug("extracted "+current+" and added to L");
            L.add(new Pair(current, wfNameRef));

            //         for (String s:L) logger.debug(s);

            List<String> successors = successorsOf.get(current);

            //         logger.debug("\n****successors of "+current);

            if (successors == null)
                continue;

            // reduce the number of predecessors to each of the successors by one
            // NB we must traverse an additional arc through a nested workflow input if the successor is a dataflow!!
            for (String succ : successors) {

                //            logger.debug(succ);

                // decrease edge count for each successor processor
                Integer cnt = predecessorsCount.get(succ);
                predecessorsCount.put(succ, new Integer(cnt.intValue() - 1));

                //            logger.debug("now "+succ+" has "+predecessorsCount.get(succ)+" predecessors");

                if (predecessorsCount.get(succ) == 0 && !succ.equals(dataflowName)) {
                    Q.add(succ);
                    //               logger.debug("adding "+succ+" to queue");
                }
            }
        } // end loop on Q
        return L;
    }

    public List<String> propagateANL(String wfInstanceId) throws SQLException {

        String top = pq.getTopLevelDataflowName(wfInstanceId);

        // //////////////////////
        // PHASE I: toposort the processors in the whole graph
        // //////////////////////
        List<Pair> sorted = toposort(top, wfInstanceId);

        List<String> sortedProcessors = new ArrayList<String>();

        for (Pair p : sorted) {
            sortedProcessors.add(p.getV1());
        }

        logger.debug("final sorted list of processors");
        for (Pair p : sorted) {
            logger.debug(p.getV1() + "  in wfnameRef " + p.getV2());
        }

        // //////////////////////
        // PHASE II: traverse and set anl on each port
        // //////////////////////

        //      logger.debug("***** STARTING ANL *****");

        //      // sorted processor names in L at this point
        //      // process them in order
        for (Pair pnameInContext : sorted) {

            //         logger.debug("setting ANL for "+pnameInContext.getV1()+" input vars");

            //         // process pname's inputs -- set ANL to be the DNL if not set in prior steps
            String pname = pnameInContext.getV1();
            String wfNameRef = pnameInContext.getV2();

            //         logger.debug("processor "+pname);

            List<Var> inputs = getPq().getInputVars(pname, wfNameRef, wfInstanceId); // null -> do not use instance (??) CHECK

            //         logger.debug(inputs.size()+" inputs for "+pnameInContext.getV1());

            int totalANL = 0;
            for (Var iv : inputs) {

                if (iv.isANLset() == false) {
                    iv.setActualNestingLevel(iv.getTypeNestingLevel());
                    iv.setANLset(true);
                    getPw().updateVar(iv);

                    //               logger.debug("var: "+iv.getVName()+" set at nominal level "+iv.getActualNestingLevel());               
                }

                int delta_nl = iv.getActualNestingLevel() - iv.getTypeNestingLevel();

                // if delta_nl < 0 then Taverna wraps the value into a list --> use dnl(X) in this case
                if (delta_nl < 0)
                    delta_nl = 0;// CHECK iv.getTypeNestingLevel();
                //            logger.debug("delta for "+iv.getVName()+" "+delta_nl);

                totalANL += delta_nl;

                // this should take care of the special case of the top level dataflow with inputs that have successors in the graph
                // propagate this through all the links from this var
                //            List<Var> successors = getPq().getSuccVars(pname, iv.getVName(), wfInstanceId);

                //            logger.debug(successors.size()+ " successors for var "+iv.getVName());

                //            for (Var v : successors) {
                //            v.setActualNestingLevel(iv.getActualNestingLevel());
                //            v.setANLset(true);
                //            getPw().updateVar(v);
                //            }
            }
            //         logger.debug("total anl: "+totalANL);

            //         logger.debug("now setting ANL for "+pname+" output vars");

            // process pname's outputs -- set ANL based on the sum formula (see
            // paper)
            List<Var> outputs = getPq().getOutputVars(pname, wfNameRef, wfInstanceId);
            for (Var ov : outputs) {

                ov.setActualNestingLevel(ov.getTypeNestingLevel() + totalANL);

                logger.debug(
                        "anl for " + pname + ":" + ov.getVName() + " = " + (ov.getTypeNestingLevel() + totalANL));
                ov.setANLset(true);
                getPw().updateVar(ov);

                // propagate this through all the links from this var
                List<Var> successors = getPq().getSuccVars(pname, ov.getVName(), wfNameRef);

                //            logger.debug(successors.size()+ " successors for var "+ov.getVName());

                for (Var v : successors) {

                    List<Var> toBeProcessed = new ArrayList<Var>();
                    toBeProcessed.add(v);

                    if (pq.isDataflow(v.getPName()) && v.isInput()) { // this is the input to a nested workflow

                        //                  String tempWfNameRef = pq.getWfNameForDataflow(v.getPName(), wfInstanceId);
                        String tempWfNameRef = pq.getWfNameForDataflow(v.getPName());
                        List<Var> realSuccessors = getPq().getSuccVars(v.getPName(), v.getVName(), tempWfNameRef);

                        //                  logger.debug("realSuccessors size = "+realSuccessors.size());

                        toBeProcessed.remove(0);
                        toBeProcessed.addAll(realSuccessors);

                    } else if (pq.isDataflow(v.getPName()) && !v.isInput()) { // this is the output to a nested workflow

                        //                  String tempWfNameRef = pq.getWfNameForDataflow(v.getPName(), wfInstanceId);
                        String tempWfNameRef = pq.getWfNameForDataflow(v.getPName());
                        List<Var> realSuccessors = getPq().getSuccVars(v.getPName(), v.getVName(), null);

                        //                  logger.debug("realSuccessors size = "+realSuccessors.size());

                        toBeProcessed.remove(0);
                        toBeProcessed.addAll(realSuccessors);

                    }

                    for (Var v1 : toBeProcessed) {
                        v1.setActualNestingLevel(ov.getActualNestingLevel());
                        logger.debug("anl for " + v1.getPName() + ":" + v1.getVName() + " = "
                                + ov.getActualNestingLevel());

                        v1.setANLset(true);
                        getPw().updateVar(v1);
                    }
                }
            }
        }
        return sortedProcessors;
    }

    public void setPw(ProvenanceWriter pw) {
        this.pw = pw;
    }

    public ProvenanceWriter getPw() {
        return pw;
    }

    public void setPq(ProvenanceQuery pq) {
        this.pq = pq;
    }

    public ProvenanceQuery getPq() {
        return pq;
    }

    public void setWfInstanceID(String wfInstanceID) {
        this.wfInstanceID = wfInstanceID;
    }

    public String getWfInstanceID() {
        return wfInstanceID;
    }

    public void setWfdp(WorkflowDataProcessor wfdp) {
        this.wfdp = wfdp;
    }

    public WorkflowDataProcessor getWfdp() {
        return wfdp;
    }

}