Source code

Java tutorial


Here is the source code for


Copyright (C) SYSTAP, LLC 2006-2010.  All rights reserved.
 4501 Tower Road
 Greensboro, NC 27410
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 * Created on Aug 18, 2010

package com.bigdata.bop.controller;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import org.apache.http.conn.ClientConnectionManager;
import org.apache.log4j.Logger;
import org.openrdf.query.BindingSet;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.IVariableOrConstant;
import com.bigdata.bop.NV;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.join.HashJoinAnnotations;
import com.bigdata.bop.join.JVMHashJoinUtility;
import com.bigdata.bop.join.JoinAnnotations;
import com.bigdata.bop.join.JoinTypeEnum;
import com.bigdata.htree.HTree;
import com.bigdata.rdf.lexicon.LexiconRelation;
import com.bigdata.rdf.model.BigdataURI;
import com.bigdata.rdf.sparql.ast.service.BigdataServiceCall;
import com.bigdata.rdf.sparql.ast.service.ExternalServiceCall;
import com.bigdata.rdf.sparql.ast.service.RemoteServiceCall;
import com.bigdata.rdf.sparql.ast.service.ServiceCall;
import com.bigdata.rdf.sparql.ast.service.ServiceCallUtility;
import com.bigdata.rdf.sparql.ast.service.ServiceNode;
import com.bigdata.rdf.sparql.ast.service.ServiceRegistry;
import com.bigdata.relation.accesspath.AbstractUnsynchronizedArrayBuffer;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import com.bigdata.relation.accesspath.IBuffer;
import com.bigdata.relation.accesspath.UnsyncLocalOutputBuffer;
import com.bigdata.striterator.ChunkedArrayIterator;
import com.bigdata.striterator.Chunkerator;
import com.bigdata.util.InnerCause;
import com.bigdata.util.concurrent.LatchedExecutor;

import cutthecrap.utils.striterators.ICloseableIterator;
import cutthecrap.utils.striterators.SingleValueIterator;

 * Vectored pipeline join of the source solution(s) with solutions from a a
 * SERVICE invocation. This operator may be used to invoke: (a) internal,
 * bigdata-aware services; (b) internal openrdf aware services; and (c) remote
 * services.
 * <p>
 * Source solutions are vectored for the same target service. Source solutions
 * which target different services are first grouped by the target service and
 * then vectored to each target service. Remote SERVICEs receive their vectored
 * inputs through a BINDINGS clause rather than a {@link IBindingSet}[]. The
 * service call(s) will be cancelled if the parent query is cancelled.
 * <p>
 * For each binding set presented, this operator executes the service joining
 * the solutions from the service against the source binding set. Since each
 * invocation of the service will (typically) produce the same solutions, this
 * operator should always be the first operator in a named subquery in order to
 * ensure that the service is invoked exactly once. The solutions written onto
 * the sink may then joined with other access paths before they reach the end of
 * the named subquery and are materialized (by the parent) on an {@link HTree}.
 * <p>
 * Any solutions produced by the service are copied to the default sink.
 * @author <a href="">Bryan Thompson</a>
public class ServiceCallJoin extends PipelineOp {

    private static final Logger log = Logger.getLogger(ServiceCallJoin.class);

    private static final long serialVersionUID = 1L;

    public interface Annotations extends PipelineOp.Annotations {

         * Optional constraints to be applied to each solution.
         * @see JoinAnnotations#CONSTRAINTS
        String CONSTRAINTS = JoinAnnotations.CONSTRAINTS;

         * The {@link ServiceNode} modeling the SERVICE clause to be invoked.
         * <p>
         * Note: This presence of the {@link ServiceNode} as an attribute on the
         * {@link ServiceCallJoin} blends the bop (physical query plan) and the
         * AST (logical query plan). However, we basically need all of the data
         * from the {@link ServiceNode} in order to handle remote service end
         * points so it is much simpler to reuse the encapsulation here.
         * @see ServiceRegistry
        String SERVICE_NODE = ServiceCallJoin.class.getName() + ".serviceNode";

         * The namespace of the {@link AbstractTripleStore} instance (not the
         * namespace of the lexicon relation). This resource will be located and
         * made available to the {@link ServiceCall}.
        String NAMESPACE = ServiceCallJoin.class.getName() + ".namespace";

         * The timestamp of the {@link AbstractTripleStore} view to be located.
        String TIMESTAMP = ServiceCallJoin.class.getName() + ".timestamp";

         * The join variables. This is used to establish a correlation between
         * the solutions vectored into the SERVICE call and the solutions
         * flowing out of the SERVICE call.
         * @see HashJoinAnnotations#JOIN_VARS
        String JOIN_VARS = HashJoinAnnotations.JOIN_VARS;


     * Deep copy constructor.
    public ServiceCallJoin(final ServiceCallJoin op) {

     * Shallow copy constructor.
     * @param args
     * @param annotations
    public ServiceCallJoin(final BOp[] args, final Map<String, Object> annotations) {

        super(args, annotations);




        //        getRequiredProperty(Annotations.PROJECTED_VARS);



    public ServiceCallJoin(final BOp[] args, NV... annotations) {

        this(args, NV.asMap(annotations));


    public FutureTask<Void> eval(final BOpContext<IBindingSet> context) {

        return new FutureTask<Void>(new ChunkTask(this, context));


     * Evaluates the {@link ServiceCall} for each source binding set. If the
     * outer operator is interrupted, then the {@link ServiceCall} is cancelled
     * (by closing its iterator). If a {@link ServiceCall} fails, then that
     * error is propagated back to the outer operator.
    private static class ChunkTask implements Callable<Void> {

        private final ServiceCallJoin op;

        private final BOpContext<IBindingSet> context;

        //        private final IConstraint[] constraints;

        private final AbstractTripleStore db;

        private final ClientConnectionManager cm;

        private final IVariableOrConstant<?> serviceRef;

        private final ServiceNode serviceNode;

        //        final IGroupNode<IGroupMemberNode> groupNode;

        private final boolean silent;

        private final long timeout;

        private final Set<IVariable<?>> projectedVars;

        //        private final Set<IVariable<?>> joinVars;

        //        @SuppressWarnings("unchecked")
        public ChunkTask(final ServiceCallJoin op, final BOpContext<IBindingSet> context) {

            if (op == null)
                throw new IllegalArgumentException();

            if (context == null)
                throw new IllegalArgumentException();

            this.op = op;

            this.context = context;

            //            this.constraints = op
            //                    .getProperty(Annotations.CONSTRAINTS, null/* defaultValue */);

            this.serviceNode = (ServiceNode) op.getRequiredProperty(Annotations.SERVICE_NODE);

            this.serviceRef = serviceNode.getServiceRef().getValueExpression();

            final String namespace = (String) op.getRequiredProperty(Annotations.NAMESPACE);

            final long timestamp = ((Long) op.getRequiredProperty(Annotations.TIMESTAMP)).longValue();

            this.db = (AbstractTripleStore) context.getResource(namespace, timestamp);

   = context.getClientConnectionManager();

            //            this.valueFactory = db.getValueFactory();

            // Service errors are ignored when true.
            this.silent = serviceNode.isSilent();//op.getProperty(Annotations.SILENT, false);

            // The service request timeout.
            this.timeout = serviceNode.getTimeout();//op.getProperty(Annotations.TIMEOUT, Long.MAX_VALUE);

             * Note: We MUST use the projected variables for the SERVICE since
             * we can otherwise break the variable scope.
            this.projectedVars = serviceNode.getProjectedVars();

            if (projectedVars == null)
                throw new AssertionError();

            //            this.joinVars = (Set<IVariable<?>>) op
            //                    .getRequiredProperty(Annotations.JOIN_VARS);


         * Evaluate the {@link ServiceCall}.
        public Void call() throws Exception {

            if (serviceRef.isConstant()) {


            } else {



            return (Void) null;


         * The value expression for the SERVICE reference is a constant (fast
         * path).
         * @throws Exception
        private void doServiceCallWithConstant() throws Exception {

            final BigdataURI serviceURI = ServiceCallUtility.getConstantServiceURI(serviceRef);

            if (serviceURI == null)
                throw new AssertionError();

            // Lookup a class to "talk" to that Service URI.
            final ServiceCall<? extends Object> serviceCall = resolveService(serviceURI);

            try {

                final ICloseableIterator<IBindingSet[]> sitr = context.getSource();

                while (sitr.hasNext()) {

                    final IBindingSet[] chunk =;

                    final ServiceCallChunk serviceCallChunk = new ServiceCallChunk(serviceURI, serviceCall, chunk);

                    final FutureTask<Void> ft = new FutureTask<Void>(new ServiceCallTask(serviceCallChunk));


                    try {

                        ft.get(timeout, TimeUnit.MILLISECONDS);

                    } catch (TimeoutException ex) {

                        if (!silent)
                            throw ex;

                    } finally {

                        ft.cancel(true/* mayInterruptIfRunning */);



                // Flush the sink.

                // Done.

            } finally {





         * The SERVICE reference value expression is not a constant.
         * <p>
         * We need to evaluate the value expression for each source solution and
         * group the solutions by the distinct as-bound serviceRef values. If is
         * an error if any given serviceRef expression does not evaluate to a
         * URI. Once grouped by the target service URI, we vector the solutions
         * to each service. If there are multiple distinct services, then they
         * are vectored with limited parallelism to reduce latency.
         * @throws Exception 
        private void doServiceCallWithExpression() throws Exception {

            try {

                final ICloseableIterator<IBindingSet[]> sitr = context.getSource();

                while (sitr.hasNext()) {

                    final Map<BigdataURI, ServiceCallChunk> serviceCallChunks = new HashMap<BigdataURI, ServiceCallChunk>();

                    final IBindingSet[] chunk =;

                    for (int i = 0; i < chunk.length; i++) {

                        final IBindingSet bset = chunk[i];

                        final BigdataURI serviceURI = ServiceCallUtility.getServiceURI(serviceRef, bset);

                        ServiceCallChunk serviceCallChunk = serviceCallChunks.get(serviceURI);

                        if (serviceCallChunk == null) {

                            // Lookup a class to "talk" to that Service URI.
                            final ServiceCall<? extends Object> serviceCall = resolveService(serviceURI);

                                    serviceCallChunk = new ServiceCallChunk(serviceURI, serviceCall));




                     * Submit vectored service calls to each target service in
                     * parallel.
                     * Note: Parallelism evaluation of multiple services can
                     * radically reduce the latency of this operation. Limited
                     * parallelism is used to avoid too many threads being tied
                     * up in those service requests.
                     * Note: [nparallel] as reported by getMaxParallel() is a
                     * hint to the QueryEngine to indicate how many instances of
                     * an operator may be executed in parallel. This is using
                     * the same hint to specify how many service requests each
                     * operator instance may execute in parallel. That means
                     * that the real parallelism of this operator is limited by
                     * [nparallel * nparallel].
                     * In order to manage threads growth for the
                     * ServiceCallJoin, the query plan generator SHOULD specify
                     * this as an "at-once" operator (or possible "blocked")
                     * operator. That way the QueryEngine will wait until all
                     * source solutions are on hand and then invoke the
                     * ServiceCallJoin exactly once.

                    final int nparallel = op.getMaxParallel();

                    final LatchedExecutor executorService = new LatchedExecutor(context.getExecutorService(),

                    final List<FutureTask<Void>> tasks = new ArrayList<FutureTask<Void>>(serviceCallChunks.size());

                    try {

                        for (ServiceCallChunk serviceCallChunk : serviceCallChunks.values()) {

                            final FutureTask<Void> ft = new FutureTask<Void>(new ServiceCallTask(serviceCallChunk));




                        for (FutureTask<Void> ft : tasks) {

                             * Each service request is faced with the same
                             * timeout.

                            try {

                                ft.get(timeout, TimeUnit.MILLISECONDS);

                            } catch (TimeoutException ex) {

                                ft.cancel(true/* mayInterruptIfRunning */);

                                if (!silent)
                                    throw ex;



                    } finally {

                        // Ensure that all tasks are cancelled.
                        for (FutureTask<Void> ft : tasks) {

                            ft.cancel(true/* mayInterruptIfRunning */);



                } // next source solution chunk.

                // Flush the sink.

                // Done.

            } finally {





         * Return a {@link ServiceCall} which may be used to talk to a service
         * at that URI.
         * @param serviceURI
         *            The service URI.
         * @return The {@link ServiceCall} and never <code>null</code>.
        private ServiceCall<? extends Object> resolveService(final BigdataURI serviceURI) {

            final ServiceCall<?> serviceCall = ServiceRegistry.getInstance().toServiceCall(db, cm, serviceURI,

            return serviceCall;


         * Invoke a SERVICE.
        private class ServiceCallTask implements Callable<Void> {

             * The source binding set. This will be copied to the output if
             * there are no solutions for the subquery (optional join
             * semantics).
            private final IBindingSet[] chunk;

            /** The service URI. */
            private final BigdataURI serviceURI;

            /** The object used to talk to that service. */
            private final ServiceCall<?> serviceCall;

             * @param serviceCallChunk
             *            A chunk of solutions to be vectored to some target
             *            service.
            public ServiceCallTask(final ServiceCallChunk serviceCallChunk) {

                if (serviceCallChunk == null)
                    throw new IllegalArgumentException();

                serviceURI = serviceCallChunk.serviceURI;

                serviceCall = serviceCallChunk.serviceCall;

                chunk = serviceCallChunk.getSourceSolutions();


            public Void call() throws Exception {

                final UnsyncLocalOutputBuffer<IBindingSet> unsyncBuffer = new UnsyncLocalOutputBuffer<IBindingSet>(
                        op.getChunkCapacity(), context.getSink());

                final IBlockingBuffer<IBindingSet[]> sink2 = context.getSink();

                // Thread-local buffer iff optional sink is in use.
                final AbstractUnsynchronizedArrayBuffer<IBindingSet> unsyncBuffer2 = sink2 == null ? null
                        : new UnsyncLocalOutputBuffer<IBindingSet>(op.getChunkCapacity(), sink2);

                final JVMHashJoinUtility state = new JVMHashJoinUtility(op,
                        silent ? JoinTypeEnum.Optional : JoinTypeEnum.Normal);

                // Pump the solutions into the hash map.
                state.acceptSolutions(new SingleValueIterator<IBindingSet[]>(chunk), null/* stats */);

                // The iterator draining the subquery
                ICloseableIterator<IBindingSet[]> serviceSolutionItr = null;
                try {

                     * Invoke the service.
                     * Note: Returns [null] IFF SILENT and SERVICE ERROR.

                    serviceSolutionItr = doServiceCall(serviceCall, chunk);

                    if (serviceSolutionItr != null) {

                         * Do a hash join of the source solutions with the
                         * solutions from the service, outputting any solutions
                         * which join.
                         * Note: 

                        state.hashJoin(serviceSolutionItr, null/* stats */, unsyncBuffer);


                } finally {

                    // ensure the service call iterator is closed.
                    if (serviceSolutionItr != null)


                 * Note: This only handles Normal and Optional. Normal is used
                 * unless the SERVICE is SILENT.
                 * The semantics of SILENT are that it returns an "empty"
                 * solution. An empty solution joins with anything (it is the
                 * identity solution). Since there may have been join variables,
                 * we need to use an OPTIONAL join to ensure that the original
                 * solutions are passed through.
                if (state.getJoinType().isOptional()) {

                    final IBuffer<IBindingSet> outputBuffer;
                    if (unsyncBuffer2 == null) {
                        // use the default sink.
                        outputBuffer = unsyncBuffer;
                    } else {
                        // use the alternative sink.
                        outputBuffer = unsyncBuffer2;


                    if (sink2 != null) {

                } // if(optional)


                // done.
                return null;


             * Invoke the SERVICE.
             * @param serviceCall
             * @param left
             * @return An iterator from which solutions may be drained -or-
             *         <code>null</code> if the SERVICE invocation failed and
             *         SILENT is <code>true</code>.
             * @throws Exception
             *             TODO RECHUNKING Push down the
             *             ICloseableIterator<IBindingSet[]> return type into
             *             the {@link ServiceCall} interface and the various
             *             ways in which we can execute a service call. Do this
             *             as part of vectoring solutions in and out of service
             *             calls?
            private ICloseableIterator<IBindingSet[]> doServiceCall(final ServiceCall<? extends Object> serviceCall,
                    final IBindingSet[] left) throws Exception {

                try {

                    final ICloseableIterator<IBindingSet> itr;

                    if (serviceCall instanceof BigdataServiceCall) {

                        itr = doBigdataServiceCall((BigdataServiceCall) serviceCall, left);

                    } else if (serviceCall instanceof ExternalServiceCall) {

                        itr = doExternalServiceCall((ExternalServiceCall) serviceCall, left);

                    } else if (serviceCall instanceof RemoteServiceCall) {

                        itr = doRemoteServiceCall((RemoteServiceCall) serviceCall, left);

                    } else {

                        throw new AssertionError();


                    final ICloseableIterator<IBindingSet[]> itr2 = new Chunkerator<IBindingSet>(itr,
                            op.getChunkCapacity(), IBindingSet.class);

                    return itr2;

                } catch (Throwable t) {

                    if (silent && !InnerCause.isInnerCause(t, InterruptedException.class)) {
                         * If the SILENT attribute was specified, then do not
                         * halt the query if there is an error.
                         * Note: The query must still be interruptable so we do
                         * not trap exceptions whose root cause is an interrupt.

                        log.warn("Service call: serviceUri=" + serviceURI + " :" + t);

                        // Done.
                        return null;


                    throw new RuntimeException(t);



             * Evaluate a bigdata aware "service" call in the same JVM.
            private ICloseableIterator<IBindingSet> doBigdataServiceCall(final BigdataServiceCall serviceCall,
                    final IBindingSet left[]) throws Exception {



             * Evaluate an openrdf "service" call in the same JVM.
            private ICloseableIterator<IBindingSet> doExternalServiceCall(final ExternalServiceCall serviceCall,
                    final IBindingSet left[]) throws Exception {

                return doNonBigdataServiceCall(serviceCall, left);


             * Evaluate an remote SPARQL service call.
            private ICloseableIterator<IBindingSet> doRemoteServiceCall(final RemoteServiceCall serviceCall,
                    final IBindingSet left[]) throws Exception {

                return doNonBigdataServiceCall(serviceCall, left);


             * The "openrdf" internal and REMOTE SPARQL invocations look the
             * same at this abstraction. The differences are hidden in the
             * {@link ServiceCall} objects.
             * @param serviceCall
             *            The object which will make the service call.
             * @param left
             *            The source solutions.
             * @return The solutions.
            private ICloseableIterator<IBindingSet> doNonBigdataServiceCall(
                    final ServiceCall<BindingSet> serviceCall, final IBindingSet left[]) throws Exception {

                final LexiconRelation lex = db.getLexiconRelation();

                // Convert IBindingSet[] to openrdf BindingSet[].
                final BindingSet[] left2 = ServiceCallUtility.convert(lex, projectedVars, left);

                 * Note: This operation is "at-once" over the service solutions.
                 * It could be turned into a "chunked" operator over those
                 * solutions. That would make sense if the service was capable
                 * of delivering a very large number of solutions.
                ICloseableIterator<BindingSet> results = null;
                final List<BindingSet> serviceResults = new LinkedList<BindingSet>();
                try {

                    results =;

                    while (results.hasNext()) {



                } finally {

                    if (results != null)


                 * Batch resolve BigdataValues to IVs. This is necessary in
                 * order to have subsequent JOINs succeed when they join on
                 * variables which are bound to terms which are in the
                 * lexicon.

                final BindingSet[] serviceResultChunk = serviceResults
                        .toArray(new BindingSet[serviceResults.size()]);

                final IBindingSet[] bigdataSolutionChunk = ServiceCallUtility.resolve(db, serviceResultChunk);

                return new ChunkedArrayIterator<IBindingSet>(bigdataSolutionChunk);


        } // ServiceCallTask

    } // ChunkTask

     * A chunk of solutions for the same target service.
    private static class ServiceCallChunk {

        public final BigdataURI serviceURI;

        public final ServiceCall<?> serviceCall;

        private IBindingSet[] chunk;

        private final List<IBindingSet> sourceSolutions;

        public ServiceCallChunk(final BigdataURI serviceURI, final ServiceCall<?> serviceCall,
                final IBindingSet[] chunk) {

            if (serviceURI == null)
                throw new IllegalArgumentException();

            if (serviceCall == null)
                throw new IllegalArgumentException();

            if (chunk == null)
                throw new IllegalArgumentException();

            if (chunk.length == 0)
                throw new IllegalArgumentException();

            this.serviceURI = serviceURI;

            this.serviceCall = serviceCall;

            this.chunk = chunk;

            this.sourceSolutions = null;


        public ServiceCallChunk(final BigdataURI serviceURI, final ServiceCall<?> serviceCall) {

            if (serviceURI == null)
                throw new IllegalArgumentException();

            if (serviceCall == null)
                throw new IllegalArgumentException();

            this.serviceURI = serviceURI;

            this.serviceCall = serviceCall;

            this.chunk = null;

            this.sourceSolutions = new LinkedList<IBindingSet>();


        public void addSourceSolution(final IBindingSet bset) {

            if (sourceSolutions == null)
                throw new UnsupportedOperationException();



        public IBindingSet[] getSourceSolutions() {

            if (chunk != null) {

                return chunk;


            chunk = sourceSolutions.toArray(new IBindingSet[sourceSolutions.size()]);

            return chunk;


        public int hashCode() {

            return serviceURI.hashCode();


        public boolean equals(final Object o) {

            if (this == o)
                return true;

            final ServiceCallChunk c = (ServiceCallChunk) o;

            return this.serviceURI.equals(c.serviceURI);


