com.ikanow.aleph2.analytics.services.AnalyticsContext.java Source code

Java tutorial

Introduction

Here is the source code for com.ikanow.aleph2.analytics.services.AnalyticsContext.java

Source

/*******************************************************************************
 * Copyright 2015, The IKANOW Open Source Project.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package com.ikanow.aleph2.analytics.services;

import java.io.File;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.lang.reflect.Field;
import java.nio.file.FileSystems;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import org.apache.commons.io.FileUtils;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.Path;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import scala.Tuple2;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Inject;
import com.ikanow.aleph2.analytics.utils.ErrorUtils;
import com.ikanow.aleph2.core.shared.services.MultiDataService;
import com.ikanow.aleph2.core.shared.utils.DataServiceUtils;
import com.ikanow.aleph2.core.shared.utils.JarCacheUtils;
import com.ikanow.aleph2.core.shared.utils.LiveInjector;
import com.ikanow.aleph2.core.shared.utils.SharedErrorUtils;
import com.ikanow.aleph2.core.shared.utils.TimeSliceDirUtils;
import com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsAccessContext;
import com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext;
import com.ikanow.aleph2.data_model.interfaces.data_import.IEnrichmentModuleContext;
import com.ikanow.aleph2.data_model.interfaces.data_services.IManagementDbService;
import com.ikanow.aleph2.data_model.interfaces.data_services.ISearchIndexService;
import com.ikanow.aleph2.data_model.interfaces.data_services.IStorageService;
import com.ikanow.aleph2.data_model.interfaces.shared_services.IBucketLogger;
import com.ikanow.aleph2.data_model.interfaces.shared_services.ICrudService;
import com.ikanow.aleph2.data_model.interfaces.shared_services.IDataServiceProvider;
import com.ikanow.aleph2.data_model.interfaces.shared_services.IDataServiceProvider.IGenericDataService;
import com.ikanow.aleph2.data_model.interfaces.shared_services.IDataWriteService;
import com.ikanow.aleph2.data_model.interfaces.shared_services.IDataWriteService.IBatchSubservice;
import com.ikanow.aleph2.data_model.interfaces.shared_services.ILoggingService;
import com.ikanow.aleph2.data_model.interfaces.shared_services.ISecurityService;
import com.ikanow.aleph2.data_model.interfaces.shared_services.IServiceContext;
import com.ikanow.aleph2.data_model.interfaces.shared_services.IUnderlyingService;
import com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean;
import com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean.AnalyticThreadJobInputBean;
import com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean.AnalyticThreadJobOutputBean;
import com.ikanow.aleph2.data_model.objects.data_import.AnnotationBean;
import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean;
import com.ikanow.aleph2.data_model.objects.data_import.DataSchemaBean;
import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean.MasterEnrichmentType;
import com.ikanow.aleph2.data_model.objects.data_import.DataBucketStatusBean;
import com.ikanow.aleph2.data_model.objects.shared.AssetStateDirectoryBean.StateDirectoryType;
import com.ikanow.aleph2.data_model.objects.shared.AssetStateDirectoryBean;
import com.ikanow.aleph2.data_model.objects.shared.AuthorizationBean;
import com.ikanow.aleph2.data_model.objects.shared.BasicMessageBean;
import com.ikanow.aleph2.data_model.objects.shared.GlobalPropertiesBean;
import com.ikanow.aleph2.data_model.objects.shared.SharedLibraryBean;
import com.ikanow.aleph2.data_model.utils.BeanTemplateUtils;
import com.ikanow.aleph2.data_model.utils.BucketUtils;
import com.ikanow.aleph2.data_model.utils.CrudUtils;
import com.ikanow.aleph2.data_model.utils.JsonUtils;
import com.ikanow.aleph2.data_model.utils.Lambdas;
import com.ikanow.aleph2.data_model.utils.ModuleUtils;
import com.ikanow.aleph2.data_model.utils.Optionals;
import com.ikanow.aleph2.data_model.utils.Patterns;
import com.ikanow.aleph2.data_model.utils.PropertiesUtils;
import com.ikanow.aleph2.data_model.utils.SetOnce;
import com.ikanow.aleph2.data_model.utils.Tuples;
import com.ikanow.aleph2.data_model.utils.BeanTemplateUtils.BeanTemplate;
import com.ikanow.aleph2.data_model.utils.CrudUtils.MultiQueryComponent;
import com.ikanow.aleph2.data_model.utils.CrudUtils.SingleQueryComponent;
import com.ikanow.aleph2.distributed_services.data_model.DistributedServicesPropertyBean;
import com.ikanow.aleph2.distributed_services.services.ICoreDistributedServices;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigRenderOptions;
import com.typesafe.config.ConfigValueFactory;

import fj.Unit;
import fj.data.Either;
import fj.data.Validation;

/** The implementation of the analytics context interface
 * @author Alex
 */
public class AnalyticsContext implements IAnalyticsContext, Serializable {
    private static final long serialVersionUID = 6320951383741954045L;

    protected static final Logger _logger = LogManager.getLogger();

    ////////////////////////////////////////////////////////////////

    // CONSTRUCTION

    public String _mutable_serializable_signature; // (for serialization)

    public static final String __MY_BUCKET_ID = "3fdb4bfa-2024-11e5-b5f7-727283247c7e";
    public static final String __MY_TECH_LIBRARY_ID = "3fdb4bfa-2024-11e5-b5f7-727283247c7f";
    public static final String __MY_MODULE_LIBRARY_ID = "3fdb4bfa-2024-11e5-b5f7-727283247cff";
    public static final String __MY_JOB_ID = "3fdb4bfa-2024-11e5-b5f7-7272832480f0";

    private static final EnumSet<MasterEnrichmentType> _streaming_types = EnumSet.of(MasterEnrichmentType.streaming,
            MasterEnrichmentType.streaming_and_batch);
    private static final EnumSet<MasterEnrichmentType> _batch_types = EnumSet.of(MasterEnrichmentType.batch,
            MasterEnrichmentType.streaming_and_batch);

    protected static class MutableState {
        final SetOnce<DataBucketBean> bucket = new SetOnce<>();
        final SetOnce<AnalyticThreadJobBean> job = new SetOnce<>();
        final SetOnce<SharedLibraryBean> technology_config = new SetOnce<>();
        final SetOnce<Map<String, SharedLibraryBean>> library_configs = new SetOnce<>();
        final SetOnce<ImmutableSet<Tuple2<Class<? extends IUnderlyingService>, Optional<String>>>> service_manifest_override = new SetOnce<>();
        final SetOnce<String> signature_override = new SetOnce<>();
        final ConcurrentHashMap<String, Either<Either<IDataWriteService.IBatchSubservice<JsonNode>, IDataWriteService<JsonNode>>, String>> external_buckets = new ConcurrentHashMap<>();
        final HashMap<String, AnalyticsContext> sub_buckets = new HashMap<>();
        final Set<Tuple2<Class<? extends IUnderlyingService>, Optional<String>>> extra_auto_context_libs = new HashSet<>();
        boolean has_unflushed_data = false; //(not intended to be fully thread safe, just better than nothing if we shutdown mid write)
        final Map<String, IBucketLogger> bucket_loggers = new HashMap<String, IBucketLogger>(); //(auto flushing)
    };

    protected transient final MutableState _mutable_state = new MutableState();

    public enum State {
        IN_TECHNOLOGY, IN_MODULE
    };

    protected transient final State _state_name;

    // (stick this injection in and then call injectMembers in IN_MODULE case)
    @Inject
    protected transient IServiceContext _service_context;
    protected transient IManagementDbService _core_management_db;
    protected transient ICoreDistributedServices _distributed_services;
    protected transient IStorageService _storage_service;
    protected transient ISecurityService _security_service;
    protected transient ILoggingService _logging_service;
    protected transient GlobalPropertiesBean _globals;

    protected transient final ObjectMapper _mapper = BeanTemplateUtils.configureMapper(Optional.empty());

    // For writing objects out
    protected transient SetOnce<MultiDataService> _multi_writer = new SetOnce<>();

    private static ConcurrentHashMap<String, AnalyticsContext> static_instances = new ConcurrentHashMap<>();

    /**Guice injector
     * @param service_context
     */
    @Inject
    public AnalyticsContext(final IServiceContext service_context) {
        _state_name = State.IN_TECHNOLOGY;
        _service_context = service_context;
        _core_management_db = service_context.getCoreManagementDbService(); // (actually returns the _core_ management db service)
        _distributed_services = service_context.getService(ICoreDistributedServices.class, Optional.empty()).get();

        _storage_service = service_context.getStorageService();
        _security_service = service_context.getSecurityService();
        _logging_service = service_context.getService(ILoggingService.class, Optional.empty()).get();
        _globals = service_context.getGlobalProperties();
    }

    /** In-module constructor
     */
    public AnalyticsContext() {
        _state_name = State.IN_MODULE;

        // Can't do anything until initializeNewContext is called
    }

    /** Serialization constructor
     * @param ois
     * @throws ClassNotFoundException
     * @throws IOException
     * @throws SecurityException 
     * @throws NoSuchFieldException 
     * @throws IllegalAccessException 
     * @throws IllegalArgumentException 
     */
    private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException, NoSuchFieldException,
            SecurityException, IllegalArgumentException, IllegalAccessException {
        // default deserialization
        ois.defaultReadObject();

        // fill in the final fields
        {
            final Field f = this.getClass().getDeclaredField("_mutable_state");
            f.setAccessible(true);
            f.set(this, new MutableState());
        }
        {
            final Field f = this.getClass().getDeclaredField("_mapper");
            f.setAccessible(true);
            f.set(this, BeanTemplateUtils.configureMapper(Optional.empty()));
        }
        {
            final Field f = this.getClass().getDeclaredField("_multi_writer");
            f.setAccessible(true);
            f.set(this, new SetOnce<MultiDataService>());
        }
        {
            final Field f = this.getClass().getDeclaredField("_state_name");
            f.setAccessible(true);
            f.set(this, State.IN_MODULE);
        }

        // fill in the object fields
        initializeNewContext(_mutable_serializable_signature);
    }

    /** (FOR INTERNAL DATA MANAGER USE ONLY) Sets the bucket for this context instance
     * @param this_bucket - the bucket to associate
     * @returns whether the bucket has been updated (ie fails if it's already been set)
     */
    public boolean setBucket(final DataBucketBean this_bucket) {
        // (also check if we're in "document mode" (ie can overwrite existing docs))

        return _mutable_state.bucket.set(this_bucket);
    }

    /** (FOR INTERNAL DATA MANAGER USE ONLY) Sets the technology library bean for this context instance
     * @param this_bucket - the library bean to be associated
     * @returns whether the library bean has been updated (ie fails if it's already been set)
     */
    public boolean setTechnologyConfig(final SharedLibraryBean lib_config) {
        return _mutable_state.technology_config.set(lib_config);
    }

    /** (FOR INTERNAL DATA MANAGER USE ONLY) Sets the optional module library bean for this context instance
     * @param lib_configs - the library beans to be associated
     */
    @SuppressWarnings("deprecation")
    public void resetLibraryConfigs(final Map<String, SharedLibraryBean> lib_configs) {
        _mutable_state.library_configs.forceSet(lib_configs);
    }

    /** (FOR INTERNAL DATA MANAGER USE ONLY) Sets the job for this context
     * @param job - the job for this context
     */
    @SuppressWarnings("deprecation")
    public void resetJob(final AnalyticThreadJobBean job) {
        _mutable_state.job.forceSet(job);
    }

    /** Gets the job if one has been set else is empty
     * @return
     */
    public Optional<AnalyticThreadJobBean> getJob() {
        return Optional.of(_mutable_state.job).filter(SetOnce::isSet).map(SetOnce::get);
    }

    /** A very simple container for library beans
     * @author Alex
     */
    public static class LibraryContainerBean {
        LibraryContainerBean() {
        }

        LibraryContainerBean(Collection<SharedLibraryBean> libs) {
            this.libs = new ArrayList<>(libs);
        }

        List<SharedLibraryBean> libs;
    }

    /** FOR DEBUGGING AND TESTING ONLY, inserts a copy of the current context into the saved "in module" versions
     */
    public void overrideSavedContext() {
        static_instances.put(_mutable_state.signature_override.get(), this);
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_import.IEnrichmentModuleContext#initializeNewContext(java.lang.String)
     */
    @Override
    public void initializeNewContext(final String signature) {
        _mutable_serializable_signature = signature;

        // Register myself a shutdown hook:
        Runtime.getRuntime().addShutdownHook(new Thread(Lambdas.wrap_runnable_u(() -> {
            if (_mutable_state.has_unflushed_data) {
                this.flushBatchOutput(Optional.empty(), _mutable_state.job.get()).get(60, TimeUnit.SECONDS);
            }
        })));

        try {
            // Inject dependencies
            final Config parsed_config = ConfigFactory.parseString(signature);
            final AnalyticsContext to_clone = static_instances.get(signature);

            if (null != to_clone) { //copy the fields            
                _service_context = to_clone._service_context;
                _core_management_db = to_clone._core_management_db;
                _security_service = to_clone._security_service;
                _logging_service = to_clone._logging_service;
                _distributed_services = to_clone._distributed_services;
                _storage_service = to_clone._storage_service;
                _globals = to_clone._globals;
                // (apart from bucket, which is handled below, rest of mutable state is not needed)
            } else {
                ModuleUtils.initializeApplication(Collections.emptyList(), Optional.of(parsed_config),
                        Either.right(this));

                _core_management_db = _service_context.getCoreManagementDbService(); // (actually returns the _core_ management db service)
                _distributed_services = _service_context
                        .getService(ICoreDistributedServices.class, Optional.empty()).get();
                _storage_service = _service_context.getStorageService();
                _security_service = _service_context.getSecurityService();
                _logging_service = _service_context.getService(ILoggingService.class, Optional.empty()).get();
                _globals = _service_context.getGlobalProperties();
            }
            // Get bucket 

            final BeanTemplate<DataBucketBean> retrieve_bucket = BeanTemplateUtils
                    .from(parsed_config.getString(__MY_BUCKET_ID), DataBucketBean.class);
            this.setBucket(retrieve_bucket.get()); //(also checks on dedup setting)
            final BeanTemplate<SharedLibraryBean> retrieve_library = BeanTemplateUtils
                    .from(parsed_config.getString(__MY_TECH_LIBRARY_ID), SharedLibraryBean.class);
            _mutable_state.technology_config.set(retrieve_library.get());
            if (parsed_config.hasPath(__MY_MODULE_LIBRARY_ID)) {
                final BeanTemplate<LibraryContainerBean> retrieve_module = BeanTemplateUtils
                        .from(parsed_config.getString(__MY_MODULE_LIBRARY_ID), LibraryContainerBean.class);
                _mutable_state.library_configs.set(Optional.ofNullable(retrieve_module.get().libs)
                        .orElse(Collections.emptyList()).stream()
                        // (split each lib bean into 2 tuples, ie indexed by _id and path_name)
                        .flatMap(mod -> Arrays.asList(Tuples._2T(mod._id(), mod), Tuples._2T(mod.path_name(), mod))
                                .stream())
                        .collect(Collectors.toMap(t2 -> t2._1(), t2 -> t2._2(), (t1, t2) -> t1 // (can't happen, ignore if it does)
                                , () -> new LinkedHashMap<String, SharedLibraryBean>())));
            }
            if (parsed_config.hasPath(__MY_JOB_ID)) {
                final String job_name = parsed_config.getString(__MY_JOB_ID);

                Optionals.of(() -> retrieve_bucket.get().analytic_thread().jobs()).orElse(Collections.emptyList())
                        .stream().filter(job -> job_name.equals(job.name())).findFirst()
                        .ifPresent(job -> _mutable_state.job.trySet(job));

                getJob().ifPresent(job -> setupOutputs(_mutable_state.bucket.get(), job));
            }
            static_instances.put(signature, this);
        } catch (Exception e) {
            //DEBUG
            //System.out.println(ErrorUtils.getLongForm("{0}", e));         

            throw new RuntimeException(e);
        }
    }

    /** Returns a config object containing:
     *  - set up for any of the services described
     *  - all the rest of the configuration
     *  - the bucket bean ID
     *  SIDE EFFECT - SETS UP THE SERVICES SET 
     * @param services
     * @return
     */
    protected Config setupServices(final Optional<DataBucketBean> maybe_bucket,
            final Optional<Set<Tuple2<Class<? extends IUnderlyingService>, Optional<String>>>> services) {
        // 
        // - set up for any of the services described
        // - all the rest of the configuration
        // - the bucket bean ID

        final Config full_config = ModuleUtils.getStaticConfig()
                .withoutPath(DistributedServicesPropertyBean.APPLICATION_NAME)
                .withoutPath("MongoDbManagementDbService.v1_enabled") // (special workaround for V1 sync service)
        ;

        final ImmutableSet<Tuple2<Class<? extends IUnderlyingService>, Optional<String>>> complete_services_set = ImmutableSet
                .<Tuple2<Class<? extends IUnderlyingService>, Optional<String>>>builder()
                .addAll(services.orElse(Collections.emptySet()))
                .add(Tuples._2T(ICoreDistributedServices.class, Optional.empty()))
                .add(Tuples._2T(IManagementDbService.class, Optional.empty()))
                .add(Tuples._2T(ISearchIndexService.class, Optional.empty())) //(need this because logging doesn't correctly report its service set, should fix at some point)
                .add(Tuples._2T(IStorageService.class, Optional.empty()))
                .add(Tuples._2T(ISecurityService.class, Optional.empty()))
                .add(Tuples._2T(ILoggingService.class, Optional.empty()))
                .add(Tuples._2T(IManagementDbService.class, IManagementDbService.CORE_MANAGEMENT_DB))
                .addAll(_mutable_state.extra_auto_context_libs)
                .addAll(maybe_bucket
                        .map(bucket -> DataServiceUtils.listUnderlyingServiceProviders(bucket.data_schema()))
                        .orElse(Collections.emptyList()))
                .build();

        if (_mutable_state.service_manifest_override.isSet()) {
            if (!complete_services_set.equals(_mutable_state.service_manifest_override.get())) {
                throw new RuntimeException(ErrorUtils.SERVICE_RESTRICTIONS);
            }
        } else {
            _mutable_state.service_manifest_override.set(complete_services_set);
        }
        return full_config;
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_import.IEnrichmentModuleContext#getEnrichmentContextSignature(java.util.Optional)
     */
    @Override
    public String getAnalyticsContextSignature(final Optional<DataBucketBean> bucket,
            final Optional<Set<Tuple2<Class<? extends IUnderlyingService>, Optional<String>>>> services) {
        if (_state_name == State.IN_TECHNOLOGY) {
            final DataBucketBean my_bucket = bucket.orElseGet(() -> _mutable_state.bucket.get());

            final Config full_config = setupServices(bucket, services);

            if (_mutable_state.signature_override.isSet()) { // only run once... (this is important because can do ping/pong buffer type stuff below)
                //(note only doing this here so I can quickly check that I'm not being called multiple times with different services - ie in this case I error...)
                return this.getClass().getName() + ":" + _mutable_state.signature_override.get();
            }

            final Optional<Config> service_config = PropertiesUtils.getSubConfig(full_config, "service");
            final Config config_no_services = full_config.withoutPath("service");

            // Ugh need to add: core deps, core + underlying management db to this list

            // Service interface/implementation:
            final Config service_defn_subset = _mutable_state.service_manifest_override.get().stream() // DON'T MAKE PARALLEL SEE BELOW
                    .map(clazz_name -> {
                        final String config_path = clazz_name._2()
                                .orElse(clazz_name._1().getSimpleName().substring(1));
                        return Lambdas
                                .wrap_u(__ -> service_config.get().hasPath(config_path)
                                        ? Tuples._2T(config_path, service_config.get().getConfig(config_path))
                                        : null)
                                //(could add extra transforms here if we wanted)
                                .apply(Unit.unit());
                    }).filter(cfg -> null != cfg).reduce(ConfigFactory.empty(),
                            (acc, k_v) -> acc.withValue(k_v._1(), k_v._2().root()), (acc1, acc2) -> acc1 // (This will never be called as long as the above stream is not parallel)
            );

            // Service configuration:
            final Config service_cfgn_subset = _mutable_state.service_manifest_override.get().stream() // DON'T MAKE PARALLEL SEE BELOW
                    .reduce(config_no_services, // (leave other configurations, we just transform service specific configuration)
                            (acc, clazz_name) -> {
                                final Optional<? extends IUnderlyingService> underlying_service = _service_context
                                        .getService(clazz_name._1(), clazz_name._2());
                                return underlying_service
                                        .map(ds -> ds.createRemoteConfig(Optional.of(my_bucket), acc)).orElse(acc);
                            }, (acc1, acc2) -> acc1 // (This will never be called as long as the above stream is not parallel)
            );

            final Config config_subset_services = service_cfgn_subset.withValue("service",
                    service_defn_subset.root());

            final Config last_call = Lambdas
                    .<Config, Config>wrap_u(
                            config_pipeline -> _mutable_state.library_configs.isSet()
                                    ? config_pipeline.withValue(
                                            __MY_MODULE_LIBRARY_ID,
                                            ConfigValueFactory.fromAnyRef(BeanTemplateUtils
                                                    .toJson(new LibraryContainerBean(_mutable_state.library_configs
                                                            .get().entrySet().stream()
                                                            .filter(kv -> kv.getValue().path_name()
                                                                    .equals(kv.getKey()))
                                                            .map(kv -> kv.getValue()).collect(Collectors.toList())))
                                                    .toString()))
                                    : config_pipeline)
                    .andThen(Lambdas.wrap_u(config_pipeline -> getJob().isPresent() ? config_pipeline
                            .withValue(__MY_JOB_ID, ConfigValueFactory.fromAnyRef(getJob().get().name()) //(exists by above "if")
                    ) : config_pipeline)).apply(config_subset_services)
                    .withValue(__MY_BUCKET_ID,
                            ConfigValueFactory.fromAnyRef(BeanTemplateUtils.toJson(my_bucket).toString()))
                    .withValue(__MY_TECH_LIBRARY_ID, ConfigValueFactory.fromAnyRef(
                            BeanTemplateUtils.toJson(_mutable_state.technology_config.get()).toString()));

            final String ret1 = last_call.root().render(ConfigRenderOptions.concise());
            _mutable_state.signature_override.set(ret1);
            _mutable_serializable_signature = ret1;
            final String ret = this.getClass().getName() + ":" + ret1;

            // Finally this is a good central (central per-job, NOT per-bucket or per-bucket-and-tech (*)) place to sort out deleting ping pong buffers
            // (*) though there's some logic that attempts to simulate running once-per-bucket-and-tech
            // which should work for the moment - though will disable that functionality still because it _doesn't_ work for tidying up at the end
            getJob().ifPresent(job -> {

                centralPerJobOutputSetup(my_bucket, job);
                centralPerBucketOutputSetup(my_bucket);
            });
            return ret;
        } else {
            throw new RuntimeException(ErrorUtils.TECHNOLOGY_NOT_MODULE);
        }
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.shared_services.IUnderlyingService#getUnderlyingArtefacts()
     */
    @Override
    public Collection<Object> getUnderlyingArtefacts() {
        if (_state_name == State.IN_TECHNOLOGY) {
            if (!_mutable_state.service_manifest_override.isSet()) {
                throw new RuntimeException(ErrorUtils.SERVICE_RESTRICTIONS);
            }
            return Stream.concat(Stream.of(this, _service_context, new SharedErrorUtils()) //(last one gives us the core_shared_lib)
                    ,
                    _mutable_state.service_manifest_override.get().stream()
                            .map(t2 -> _service_context.getService(t2._1(), t2._2()))
                            .filter(service -> service.isPresent())
                            .flatMap(service -> service.get().getUnderlyingArtefacts().stream()))
                    .collect(Collectors.toSet());
        } else {
            throw new RuntimeException(ErrorUtils.TECHNOLOGY_NOT_MODULE);
        }
    }

    ////////////////////////////////////////////////////////////////

    // OVERRIDES

    @SuppressWarnings("unchecked")
    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.shared_services.IUnderlyingService#getUnderlyingPlatformDriver(java.lang.Class, java.util.Optional)
     */
    @Override
    public <T> Optional<T> getUnderlyingPlatformDriver(final Class<T> driver_class,
            final Optional<String> driver_options) {

        if (IEnrichmentModuleContext.class.isAssignableFrom(driver_class)) {
            return driver_options.map(batch_str -> {
                return (Optional<T>) Optional.of(
                        new BatchEnrichmentContext(new BatchEnrichmentContext(this), Integer.parseInt(batch_str)));
            }).orElseGet(() -> (Optional<T>) Optional.of(new BatchEnrichmentContext(this)));
        } else
            return Optional.empty();
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_import.IEnrichmentModuleContext#getService(java.lang.Class, java.util.Optional)
     */
    @Override
    public IServiceContext getServiceContext() {
        return _service_context;
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getOutputPath(java.util.Optional, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean)
     */
    @Override
    public Optional<Tuple2<String, Optional<String>>> getOutputPath(final Optional<DataBucketBean> bucket,
            final AnalyticThreadJobBean job) {
        // TODO (ALEPH-12)
        throw new RuntimeException(ErrorUtils.get(ErrorUtils.NOT_YET_IMPLEMENTED, "getOutputPath"));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getOutputTopic(java.util.Optional, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean)
     */
    @Override
    public Optional<String> getOutputTopic(final Optional<DataBucketBean> bucket, final AnalyticThreadJobBean job) {
        final DataBucketBean this_bucket = bucket.orElseGet(() -> _mutable_state.bucket.get());
        final AnalyticThreadJobOutputBean output = Optional.ofNullable(job.output())
                .orElseGet(() -> BeanTemplateUtils.build(AnalyticThreadJobOutputBean.class).done().get());
        final boolean is_transient = Optional.ofNullable(output.is_transient()).orElse(false);

        if (_streaming_types
                .contains(Optional.ofNullable(output.transient_type()).orElse(MasterEnrichmentType.none))) {
            final String topic = is_transient
                    ? _distributed_services.generateTopicName(this_bucket.full_name(), Optional.of(job.name()))
                    : _distributed_services.generateTopicName(
                            Optional.ofNullable(output.sub_bucket_path()).orElse(this_bucket.full_name()),
                            ICoreDistributedServices.QUEUE_END_NAME);
            return Optional.of(topic);
        } else
            return Optional.empty();
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getInputTopics(java.util.Optional, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean.AnalyticThreadJobInputBean)
     */
    @Override
    public List<String> getInputTopics(final Optional<DataBucketBean> bucket, final AnalyticThreadJobBean job,
            final AnalyticThreadJobInputBean job_input) {
        final DataBucketBean my_bucket = bucket.orElseGet(() -> _mutable_state.bucket.get());

        final AuthorizationBean auth_bean = new AuthorizationBean(my_bucket.owner_id());
        final ICrudService<DataBucketBean> secured_bucket_crud = _core_management_db.readOnlyVersion()
                .getDataBucketStore().secured(_service_context, auth_bean);

        return Optional.of(job_input).filter(i -> "stream".equalsIgnoreCase(i.data_service()))
                .map(Lambdas.wrap_u(i -> {
                    //Topic naming: 5 cases: 
                    // 1) i.resource_name_or_id is a bucket path, ie starts with "/", and then:
                    // 1.1) if it ends ":name" then it points to a specific point in the bucket processing
                    // 1.2) if it ends ":" or ":$start" then it points to the start of that bucket's processing (ie the output of its harvester) .. which corresponds to the queue name with no sub-channel
                    // 1.3) otherwise it points to the end of the bucket's processing (ie immediately before it's output) ... which corresponds to the queue name with the sub-channel "$end"
                    // 2) i.resource_name_or_id does not (start with a /), in which case:
                    // 2.1) if it's a non-empty string, then it's the name of one the internal jobs (can interpret that as this.full_name + name)  
                    // 2.2) if it's "" or null then it's pointing to the output of its own bucket's harvester

                    final String[] bucket_subchannel = Lambdas.<String, String[]>wrap_u(s -> {
                        if (s.startsWith("/")) { //1.*
                            if (s.endsWith(":")) {
                                return new String[] { s.substring(0, s.length() - 1), "" }; // (1.2a)
                            } else {
                                final String[] b_sc = s.split(":");
                                if (1 == b_sc.length) {
                                    return new String[] { b_sc[0], "$end" }; // (1.3)
                                } else if ("$start".equals(b_sc[1])) {
                                    return new String[] { b_sc[0], "" }; // (1.2b)                           
                                } else {
                                    return b_sc; //(1.1)
                                }
                            }
                        } else { //2.*
                            return new String[] { my_bucket.full_name(), s };
                        }
                    }).apply(i.resource_name_or_id());

                    // Check this bucket exists and I have read access to it
                    if (!my_bucket.full_name().equals(bucket_subchannel[0])) {
                        boolean found_bucket = secured_bucket_crud.getObjectBySpec(
                                CrudUtils.allOf(DataBucketBean.class).when(DataBucketBean::full_name,
                                        bucket_subchannel[0]),
                                Collections.emptyList(), // (don't want any part of the bucket, just whether it exists or not)
                                true).get().isPresent();
                        if (!found_bucket) {
                            throw new RuntimeException(ErrorUtils.get(ErrorUtils.BUCKET_NOT_FOUND_OR_NOT_READABLE,
                                    bucket_subchannel[0]));
                        }
                    }

                    final String topic = _distributed_services.generateTopicName(bucket_subchannel[0],
                            Optional.of(bucket_subchannel[1]).filter(s -> !s.isEmpty()));
                    _distributed_services.createTopic(topic, Optional.empty());
                    return topic;
                })).map(i -> Arrays.asList(i)).orElse(Collections.emptyList());
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getInputPaths(java.util.Optional, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean.AnalyticThreadJobInputBean)
     */
    @Override
    public List<String> getInputPaths(final Optional<DataBucketBean> bucket, final AnalyticThreadJobBean job,
            final AnalyticThreadJobInputBean job_input) {

        final DataBucketBean my_bucket = bucket.orElseGet(() -> _mutable_state.bucket.get());

        final AuthorizationBean auth_bean = new AuthorizationBean(my_bucket.owner_id());
        final ICrudService<DataBucketBean> secured_bucket_crud = _core_management_db.readOnlyVersion()
                .getDataBucketStore().secured(_service_context, auth_bean);

        return Optional.of(job_input).filter(i -> null != i.data_service())
                .filter(i -> "batch".equalsIgnoreCase(i.data_service())
                        || DataSchemaBean.StorageSchemaBean.name.equalsIgnoreCase(i.data_service()))
                .map(Lambdas.wrap_u(i -> {
                    if ("batch".equalsIgnoreCase(i.data_service())) {
                        final String[] bucket_subchannel = Lambdas.<String, String[]>wrap_u(s -> {

                            // 1) If the resource starts with "/" then must point to an intermediate batch result of an external bucket
                            // 2) If the resource is a pointer then

                            if (s.startsWith("/")) { //1.*
                                if (s.endsWith(":")) {
                                    return new String[] { s.substring(0, s.length() - 1), "" }; // (1.2a)
                                } else {
                                    final String[] b_sc = s.split(":");
                                    if (1 == b_sc.length) {
                                        return new String[] { my_bucket.full_name(), "" };
                                    } else {
                                        return b_sc; //(1.1)
                                    }
                                }
                            } else { //2.*
                                return new String[] { my_bucket.full_name(), s };
                            }
                        }).apply(Optional.ofNullable(i.resource_name_or_id()).orElse(""));

                        final Optional<DataBucketBean> bucket_to_check = Lambdas.get(Lambdas.wrap_u(() -> {
                            if (bucket_subchannel[0] == my_bucket.full_name()) {
                                return Optional.of(my_bucket);
                            } else {
                                return secured_bucket_crud.getObjectBySpec(CrudUtils.allOf(DataBucketBean.class)
                                        .when(DataBucketBean::full_name, bucket_subchannel[0])).get();
                            }
                        }));
                        return Lambdas.get(() -> {
                            if (!bucket_subchannel[0].equals(my_bucket.full_name())
                                    || !bucket_subchannel[1].isEmpty()) {
                                bucket_to_check.map(input_bucket -> input_bucket.analytic_thread())
                                        .flatMap(
                                                a_thread -> Optional.ofNullable(a_thread.jobs()))
                                        .flatMap(jobs -> jobs.stream()
                                                .filter(j -> bucket_subchannel[1].equals(j.name()))
                                                .filter(j -> _batch_types
                                                        .contains(Optionals.of(() -> j.output().transient_type())
                                                                .orElse(MasterEnrichmentType.none)))
                                                .filter(j -> Optionals.of(() -> j.output().is_transient())
                                                        .orElse(false))
                                                .findFirst())
                                        .orElseThrow(() -> new RuntimeException(ErrorUtils.get(
                                                ErrorUtils.INPUT_PATH_NOT_A_TRANSIENT_BATCH, my_bucket.full_name(),
                                                job.name(), bucket_subchannel[0], bucket_subchannel[1])));

                                return Arrays.asList(_storage_service.getBucketRootPath() + bucket_subchannel[0]
                                        + IStorageService.TRANSIENT_DATA_SUFFIX_SECONDARY + bucket_subchannel[1]
                                        + IStorageService.PRIMARY_BUFFER_SUFFIX + "**/*");
                            } else { // This is my input directory
                                return Arrays.asList(_storage_service.getBucketRootPath() + my_bucket.full_name()
                                        + IStorageService.TO_IMPORT_DATA_SUFFIX + "*");
                            }
                        });
                    } else { // storage service ... 3 options :raw, :json, :processed (defaults to :processed)
                        if (Optional.of(true).equals(
                                Optional.ofNullable(i.config()).map(cfg -> cfg.high_granularity_filter()))) {
                            throw new RuntimeException(ErrorUtils.get(
                                    ErrorUtils.HIGH_GRANULARITY_FILTER_NOT_SUPPORTED, my_bucket.full_name(),
                                    job.name(), Optional.ofNullable(i.name()).orElse("(no name)")));
                        }

                        final String bucket_name = i.resource_name_or_id().split(":")[0];

                        // Check we have authentication for this bucket:

                        final boolean found_bucket = secured_bucket_crud
                                .getObjectBySpec(
                                        CrudUtils.allOf(DataBucketBean.class).when(DataBucketBean::full_name,
                                                bucket_name),
                                        Collections.emptyList(), // (don't want any part of the bucket, just whether it exists or not)
                                        true)
                                .get().isPresent();

                        if (!found_bucket) {
                            throw new RuntimeException(
                                    ErrorUtils.get(ErrorUtils.BUCKET_NOT_FOUND_OR_NOT_READABLE, bucket_name));
                        }
                        final String sub_service = Patterns.match(i.resource_name_or_id()).<String>andReturn()
                                .when(s -> s.endsWith(":raw"), __ -> "raw/current/") // (input paths are always from primary)
                                .when(s -> s.endsWith(":json"), __ -> "json/current/")
                                .otherwise(__ -> "processed/current/");

                        final String base_path = _storage_service.getBucketRootPath() + bucket_name
                                + IStorageService.STORED_DATA_SUFFIX + sub_service;
                        return Optional.ofNullable(i.config())
                                .filter(cfg -> (null != cfg.time_min()) || (null != cfg.time_max())).map(cfg -> {
                                    try {
                                        final FileContext fc = _storage_service
                                                .getUnderlyingPlatformDriver(FileContext.class, Optional.empty())
                                                .get();

                                        //DEBUG
                                        //_logger.warn("Found1: " + Arrays.stream(fc.util().listStatus(new Path(base_path))).map(f -> f.getPath().toString()).collect(Collectors.joining(";")));                                                            
                                        //_logger.warn("Found2: " + TimeSliceDirUtils.annotateTimedDirectories(tmp_paths).map(t -> t.toString()).collect(Collectors.joining(";")));
                                        //_logger.warn("Found3: " + TimeSliceDirUtils.getQueryTimeRange(cfg, new Date()));

                                        final Stream<String> paths = Arrays
                                                .stream(fc.util().listStatus(new Path(base_path)))
                                                .filter(f -> f.isDirectory())
                                                .map(f -> f.getPath().toUri().getPath()) // (remove the hdfs:// bit, which seems to be breaking with HA)
                                        ;

                                        return TimeSliceDirUtils
                                                .filterTimedDirectories(
                                                        TimeSliceDirUtils.annotateTimedDirectories(paths),
                                                        TimeSliceDirUtils.getQueryTimeRange(cfg, new Date()))
                                                .map(s -> s + "/*").collect(Collectors.toList());
                                    } catch (Exception e) {
                                        return null;
                                    } // will fall through to...
                                }).orElseGet(() -> {
                                    // No time based filtering possible
                                    final String suffix = "**/*";
                                    return Arrays.asList(base_path + suffix);
                                });
                    }
                })).orElse(Collections.emptyList());

    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#checkForListeners(java.util.Optional, java.util.Optional)
     */
    @Override
    public boolean checkForListeners(final Optional<DataBucketBean> bucket, final AnalyticThreadJobBean job) {

        final DataBucketBean this_bucket = bucket.orElseGet(() -> _mutable_state.bucket.get());
        final AnalyticThreadJobOutputBean output = Optional.ofNullable(job.output())
                .orElseGet(() -> BeanTemplateUtils.build(AnalyticThreadJobOutputBean.class).done().get());

        final String topic_name = Optional.ofNullable(output.is_transient()).orElse(false)
                ? _distributed_services.generateTopicName(this_bucket.full_name(), Optional.of(job.name()))
                : _distributed_services.generateTopicName(
                        Optional.ofNullable(output.sub_bucket_path()).orElse(this_bucket.full_name()),
                        ICoreDistributedServices.QUEUE_END_NAME);

        return _distributed_services.doesTopicExist(topic_name);
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getAnalyticsContextLibraries(java.util.Optional)
     */
    @Override
    public List<String> getAnalyticsContextLibraries(
            final Optional<Set<Tuple2<Class<? extends IUnderlyingService>, Optional<String>>>> services) {

        // Consists of:
        // 1) This library 
        // 2) Libraries that are always needed:      
        //    - core distributed services (implicit)
        //    - management db (core + underlying + any underlying drivers)
        //    - data model 
        // 3) Any libraries associated with the services      

        if (_state_name == State.IN_TECHNOLOGY) {
            if (!_mutable_state.service_manifest_override.isSet()) {
                setupServices(_mutable_state.bucket.optional(), services);
            }

            // Already registered 
            final Set<String> all_service_class_files = this.getUnderlyingArtefacts().stream()
                    .map(service -> LiveInjector.findPathJar(service.getClass(), "")).collect(Collectors.toSet());

            // Combine them together
            final List<String> ret_val = ImmutableSet.<String>builder().addAll(all_service_class_files).build()
                    .stream().filter(f -> (null != f) && !f.equals("")).collect(Collectors.toList());

            if (ret_val.isEmpty()) {
                _logger.warn(
                        "WARNING: no library files found, probably because this is running from an IDE - instead taking all JARs from: "
                                + (_globals.local_root_dir() + "/lib/"));
            }

            return !ret_val.isEmpty() ? ret_val :
            // Special case: no aleph2 libs found, this is almost certainly because this is being run from eclipse...
                    Lambdas.get(() -> {
                        try {
                            return FileUtils
                                    .listFiles(new File(_globals.local_root_dir() + "/lib/"),
                                            new String[] { "jar" }, false)
                                    .stream().map(File::toString).collect(Collectors.toList());
                        } catch (Exception e) {
                            throw new RuntimeException("In eclipse/IDE mode, directory not found: "
                                    + (_globals.local_root_dir() + "/lib/"));
                        }
                    });
        } else {
            throw new RuntimeException(ErrorUtils.TECHNOLOGY_NOT_MODULE);
        }
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getAnalyticsLibraries(java.util.Optional)
     */
    @Override
    public CompletableFuture<Map<String, String>> getAnalyticsLibraries(final Optional<DataBucketBean> bucket,
            final Collection<AnalyticThreadJobBean> jobs) {
        if (_state_name == State.IN_TECHNOLOGY) {

            final String name_or_id = jobs.stream().findFirst().get().analytic_technology_name_or_id();

            final SingleQueryComponent<SharedLibraryBean> tech_query = CrudUtils.anyOf(SharedLibraryBean.class)
                    .when(SharedLibraryBean::_id, name_or_id).when(SharedLibraryBean::path_name, name_or_id);

            final List<SingleQueryComponent<SharedLibraryBean>> other_libs = Stream
                    .concat(Optional.ofNullable(jobs.stream().findFirst().get().module_name_or_id()).map(Stream::of)
                            .orElseGet(Stream::empty),
                            Optionals.ofNullable(jobs).stream()
                                    .flatMap(job -> Optionals.ofNullable(job.library_names_or_ids()).stream()))
                    .map(name -> {
                        return CrudUtils.anyOf(SharedLibraryBean.class).when(SharedLibraryBean::_id, name)
                                .when(SharedLibraryBean::path_name, name);
                    }).collect(Collector.of(LinkedList::new, LinkedList::add, (left, right) -> {
                        left.addAll(right);
                        return left;
                    }));

            @SuppressWarnings("unchecked")
            final MultiQueryComponent<SharedLibraryBean> spec = CrudUtils.<SharedLibraryBean>anyOf(tech_query,
                    other_libs.toArray(new SingleQueryComponent[other_libs.size()]));

            // Get the names or ids, get the shared libraries, get the cached ids (must be present)

            return this._core_management_db.readOnlyVersion().getSharedLibraryStore()
                    .getObjectsBySpec(spec, Arrays.asList(JsonUtils._ID, "path_name"), true).thenApply(cursor -> {
                        return StreamSupport.stream(cursor.spliterator(), false)
                                .collect(Collectors.<SharedLibraryBean, String, String>toMap(lib -> lib.path_name(),
                                        lib -> _globals.local_cached_jar_dir() + "/"
                                                + JarCacheUtils.buildCachedJarName(lib)));
                    });
        } else {
            throw new RuntimeException(ErrorUtils.TECHNOLOGY_NOT_MODULE);
        }
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getGlobalAnalyticTechnologyObjectStore(java.lang.Class, java.util.Optional)
     */
    @Override
    public <S> ICrudService<S> getGlobalAnalyticTechnologyObjectStore(final Class<S> clazz,
            final Optional<String> collection) {
        return this.getBucketObjectStore(clazz, Optional.empty(), collection,
                Optional.of(AssetStateDirectoryBean.StateDirectoryType.library));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getLibraryObjectStore(java.lang.Class, java.util.Optional)
     */
    @Override
    public <S> Optional<ICrudService<S>> getLibraryObjectStore(final Class<S> clazz, final String name_or_id,
            final Optional<String> collection) {
        return Optional.ofNullable(this.getLibraryConfigs().get(name_or_id))
                .map(module_lib -> _core_management_db.getPerLibraryState(clazz, module_lib, collection));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getBucketObjectStore(java.lang.Class, java.util.Optional, java.util.Optional, java.util.Optional)
     */
    @Override
    public <S> ICrudService<S> getBucketObjectStore(final Class<S> clazz, final Optional<DataBucketBean> bucket,
            final Optional<String> collection, final Optional<StateDirectoryType> type) {
        final Optional<DataBucketBean> this_bucket = bucket.map(x -> Optional.of(x)).orElseGet(
                () -> _mutable_state.bucket.isSet() ? Optional.of(_mutable_state.bucket.get()) : Optional.empty());

        return Patterns.match(type).<ICrudService<S>>andReturn()
                .when(t -> t.isPresent() && AssetStateDirectoryBean.StateDirectoryType.enrichment == t.get(),
                        __ -> _core_management_db.getBucketEnrichmentState(clazz, this_bucket.get(), collection))
                .when(t -> t.isPresent() && AssetStateDirectoryBean.StateDirectoryType.harvest == t.get(),
                        __ -> _core_management_db.getBucketHarvestState(clazz, this_bucket.get(), collection))
                // assume this is the technology context, most likely usage
                .when(t -> t.isPresent() && AssetStateDirectoryBean.StateDirectoryType.library == t.get(),
                        __ -> _core_management_db.getPerLibraryState(clazz, this.getTechnologyConfig(), collection))
                // default: analytics or not specified: analytics
                .otherwise(__ -> _core_management_db.getBucketAnalyticThreadState(clazz, this_bucket.get(),
                        collection));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getBucket()
     */
    @Override
    public Optional<DataBucketBean> getBucket() {
        return _mutable_state.bucket.isSet() ? Optional.of(_mutable_state.bucket.get()) : Optional.empty();
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getLibraryConfig()
     */
    @Override
    public SharedLibraryBean getTechnologyConfig() {
        return _mutable_state.technology_config.get();
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getLibraryConfigs()
     */
    @Override
    public Map<String, SharedLibraryBean> getLibraryConfigs() {
        return _mutable_state.library_configs.isSet() ? _mutable_state.library_configs.get()
                : Collections.emptyMap();
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getBucketStatus(java.util.Optional)
     */
    @Override
    public CompletableFuture<DataBucketStatusBean> getBucketStatus(final Optional<DataBucketBean> bucket) {
        return this._core_management_db.readOnlyVersion().getDataBucketStatusStore()
                .getObjectById(bucket.orElseGet(() -> _mutable_state.bucket.get())._id())
                .thenApply(opt_status -> opt_status.get());
        // (ie will exception if not present)
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getLogger(java.util.Optional)
     */
    @Override
    public IBucketLogger getLogger(Optional<DataBucketBean> bucket) {
        final DataBucketBean b = bucket.orElseGet(() -> _mutable_state.bucket.get());
        return _mutable_state.bucket_loggers.computeIfAbsent(b.full_name(), (k) -> _logging_service.getLogger(b));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#emergencyDisableBucket(java.util.Optional)
     */
    @Override
    public void emergencyDisableBucket(final Optional<DataBucketBean> bucket) {
        throw new RuntimeException(ErrorUtils.get(ErrorUtils.NOT_YET_IMPLEMENTED, "emergencyDisableBucket"));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#emergencyQuarantineBucket(java.util.Optional, java.lang.String)
     */
    @Override
    public void emergencyQuarantineBucket(final Optional<DataBucketBean> bucket, final String quarantine_duration) {
        throw new RuntimeException(ErrorUtils.get(ErrorUtils.NOT_YET_IMPLEMENTED, "emergencyQuarantineBucket"));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#sendObjectToStreamingPipeline(java.util.Optional, java.util.Optional, fj.data.Either)
     */
    @Override
    public Validation<BasicMessageBean, JsonNode> sendObjectToStreamingPipeline(
            final Optional<DataBucketBean> bucket, final AnalyticThreadJobBean job,
            final Either<JsonNode, Map<String, Object>> object, final Optional<AnnotationBean> annotations) {
        if (annotations.isPresent()) {
            throw new RuntimeException(ErrorUtils.get(ErrorUtils.NOT_YET_IMPLEMENTED, "annotations"));
        }
        final JsonNode obj_json = object.either(__ -> __,
                map -> (JsonNode) _mapper.convertValue(map, JsonNode.class));

        return this.getOutputTopic(bucket, job).<Validation<BasicMessageBean, JsonNode>>map(topic -> {
            if (_distributed_services.doesTopicExist(topic)) {
                // (ie someone is listening in on our output data, so duplicate it for their benefit)
                _distributed_services.produce(topic, obj_json.toString());
                return Validation.success(obj_json);
            } else {
                return Validation.fail(ErrorUtils.buildSuccessMessage(this.getClass().getSimpleName(),
                        "sendObjectToStreamingPipeline", "Bucket:job {0}:{1} topic {2} has no listeners",
                        bucket.map(b -> b.full_name()).orElse("(unknown)"), job.name(), topic));
            }
        }).orElseGet(() -> {
            return Validation.fail(ErrorUtils.buildErrorMessage(this.getClass().getSimpleName(),
                    "sendObjectToStreamingPipeline", "Bucket:job {0}:{1} has no output topic",
                    bucket.map(b -> b.full_name()).orElse("(unknown)"), job.name()));
        });
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getServiceInput(java.lang.Class, java.util.Optional, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean.AnalyticThreadJobInputBean)
     */
    @Override
    public <T extends IAnalyticsAccessContext<?>> Optional<T> getServiceInput(final Class<T> clazz,
            final Optional<DataBucketBean> bucket, final AnalyticThreadJobBean job,
            final AnalyticThreadJobInputBean job_input) {
        if (_mutable_state.service_manifest_override.isSet()) { // Can't call getServiceInput after getContextSignature/getContextLibraries
            throw new RuntimeException(ErrorUtils.get(ErrorUtils.SERVICE_RESTRICTIONS));
        }

        //TODO: should support "" as myself (clone or something?) - also check if null works in getInputPaths/Queues
        //TODO: what happens if I external emit to myself (again supporting "") .. not sure if the output services will be set up?

        // First off, check we have permissions:
        final DataBucketBean my_bucket = bucket.orElseGet(() -> _mutable_state.bucket.get());

        final AuthorizationBean auth_bean = new AuthorizationBean(my_bucket.owner_id());
        final ICrudService<DataBucketBean> secured_bucket_crud = _core_management_db.readOnlyVersion()
                .getDataBucketStore().secured(_service_context, auth_bean);

        final String resource_or_name = Optional.ofNullable(job_input.resource_name_or_id()).orElse("/unknown");
        final String data_service = Optional.ofNullable(job_input.data_service()).orElse("unknown.unknown");

        final boolean found_bucket = Lambdas.wrap_u(() -> {
            return resource_or_name.startsWith(BucketUtils.EXTERNAL_BUCKET_PREFIX) ? true // (in this case we delegate the checking to the data service)
                    : secured_bucket_crud.getObjectBySpec(
                            CrudUtils.allOf(DataBucketBean.class).when(DataBucketBean::full_name,
                                    job_input.resource_name_or_id()),
                            Collections.emptyList(), // (don't want any part of the bucket, just whether it exists or not)
                            true).get().isPresent();
        }).get();

        if (!found_bucket) {
            throw new RuntimeException(
                    ErrorUtils.get(ErrorUtils.BUCKET_NOT_FOUND_OR_NOT_READABLE, job_input.resource_name_or_id()));
        }

        // Now try to get the technology driver

        // the job config is in the format owner_id:bucket_name:{input_config}
        final Optional<String> job_config = Optional.of(my_bucket.owner_id() + ":" + my_bucket.full_name() + ":"
                + BeanTemplateUtils.toJson(job_input).toString());

        final String[] other_service = data_service.split("[.]");

        final Optional<T> ret_val = getUnderlyingService(_service_context, other_service[0],
                Optionals.of(() -> other_service[1]))
                        .flatMap(service -> service.getUnderlyingPlatformDriver(clazz, job_config));

        // Add to list of extra services to add automatically
        if (ret_val.isPresent()) { // only if we managed to load the analytics access context
            DataServiceUtils.getUnderlyingServiceInterface(other_service[0])
                    .ifPresent(ds -> _mutable_state.extra_auto_context_libs
                            .add(Tuples._2T(ds, Optionals.of(() -> other_service[1]).filter(s -> !s.isEmpty()))));
        }

        return ret_val;
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#getServiceOutput(java.lang.Class, java.util.Optional, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, java.lang.String)
     */
    @Override
    public <T extends IAnalyticsAccessContext<?>> Optional<T> getServiceOutput(final Class<T> clazz,
            final Optional<DataBucketBean> bucket, final AnalyticThreadJobBean job, final String data_service) {
        if (_mutable_state.service_manifest_override.isSet()) { // Can't call getServiceInput after getContextSignature/getContextLibraries
            throw new RuntimeException(ErrorUtils.get(ErrorUtils.SERVICE_RESTRICTIONS));
        }
        final Optional<String> job_config = Optional.of(BeanTemplateUtils.toJson(job).toString());
        return getUnderlyingService(_service_context, data_service, Optional.empty())
                .flatMap(ds -> ds.getUnderlyingPlatformDriver(clazz, job_config));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#emitObject(java.util.Optional, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, fj.data.Either, java.util.Optional)
     */
    @Override
    public Validation<BasicMessageBean, JsonNode> emitObject(final Optional<DataBucketBean> bucket,
            final AnalyticThreadJobBean job, final Either<JsonNode, Map<String, Object>> object,
            final Optional<AnnotationBean> annotations) {
        final DataBucketBean this_bucket = bucket.orElseGet(() -> _mutable_state.bucket.get());

        if (annotations.isPresent()) {
            throw new RuntimeException(ErrorUtils.get(ErrorUtils.NOT_YET_IMPLEMENTED, "annotations"));
        }
        final JsonNode obj_json = object.either(__ -> __,
                map -> (JsonNode) _mapper.convertValue(map, JsonNode.class));

        if (!this_bucket.full_name().equals(_mutable_state.bucket.get().full_name())) {
            return externalEmit(this_bucket, job, obj_json);
        }
        _mutable_state.has_unflushed_data = this._multi_writer.get().batchWrite(obj_json);

        final String topic = _distributed_services.generateTopicName(this_bucket.full_name(),
                ICoreDistributedServices.QUEUE_END_NAME);
        if (_distributed_services.doesTopicExist(topic)) {
            // (ie someone is listening in on our output data, so duplicate it for their benefit)
            _mutable_state.has_unflushed_data = true;
            _distributed_services.produce(topic, obj_json.toString());
        }
        //(else nothing to do)

        return Validation.success(obj_json);
    }

    /////////////////////////////////////////////////////////////////////////////////////////////////

    // External emit logic

    /** Handles sending of objects to other parts of the system
     * @param external_bucket
     * @param job
     * @param obj_json
     * @return
     */
    protected Validation<BasicMessageBean, JsonNode> externalEmit(final DataBucketBean external_bucket,
            final AnalyticThreadJobBean job, final JsonNode obj_json) {
        // Do we already know about this object:
        final IAnalyticsContext sub_context = _mutable_state.sub_buckets.get(external_bucket.full_name());
        if (null != sub_context) { //easy case, sub-buckets are constructed dynamically
            return sub_context.emitObject(sub_context.getBucket(), job, Either.left(obj_json), Optional.empty());
        } else { // this is more complicated         
            // First off - if this is a test bucket then we're not going to write anything, but we will do all the authentication   
            final boolean is_test_bucket = BucketUtils.isTestBucket(_mutable_state.bucket.get());

            final Either<Either<IBatchSubservice<JsonNode>, IDataWriteService<JsonNode>>, String> element = _mutable_state.external_buckets
                    .computeIfAbsent(external_bucket.full_name(), s -> {
                        // Check this is supported:
                        final boolean matches_glob = Optionals
                                .of(() -> _mutable_state.bucket.get().external_emit_paths())
                                .orElse(Collections.emptyList()).stream()
                                .map(p -> FileSystems.getDefault().getPathMatcher("glob:" + p))
                                .anyMatch(matcher -> matcher.matches(FileSystems.getDefault().getPath(s)));

                        return matches_glob ? getNewExternalEndpoint(s) : Either.right(null);
                    });

            return element.<Validation<BasicMessageBean, JsonNode>>either(e -> e.either(batch -> {
                if (!is_test_bucket) {
                    _mutable_state.has_unflushed_data = true;
                    batch.storeObject(obj_json);
                }
                return Validation.success(obj_json);
            }, slow -> {
                if (!is_test_bucket) {
                    _mutable_state.has_unflushed_data = true;
                    slow.storeObject(obj_json);
                }
                return Validation.success(obj_json);
            }), topic -> {
                if (null == topic) { // this hack means not present
                    return Validation.fail(ErrorUtils.buildErrorMessage(this.getClass().getSimpleName(),
                            "externalEmit", "Bucket {0} not found or not authorized", external_bucket.full_name()));
                } else if (_distributed_services.doesTopicExist(topic)) {
                    // (ie someone is listening in on our output data, so duplicate it for their benefit)
                    if (!is_test_bucket) {
                        _mutable_state.has_unflushed_data = true;
                        _distributed_services.produce(topic, obj_json.toString());
                    }
                    return Validation.success(obj_json);
                } else {
                    return Validation.fail(ErrorUtils.buildSuccessMessage(this.getClass().getSimpleName(),
                            "sendObjectToStreamingPipeline", "Bucket:job {0}:{1} topic {2} has no listeners",
                            external_bucket.full_name(), job.name(), topic));
                }
            });
        }
    }

    /** Validates and retrieves (and caches) a requested output
     * @param bucket_name
     * @return
     */
    protected Either<Either<IBatchSubservice<JsonNode>, IDataWriteService<JsonNode>>, String> getNewExternalEndpoint(
            final String bucket_name) {

        final Optional<DataBucketBean> validated_external_bucket = _core_management_db.readOnlyVersion()
                .getDataBucketStore()
                .getObjectBySpec(CrudUtils.allOf(DataBucketBean.class).when(DataBucketBean::full_name, bucket_name))
                .<Optional<DataBucketBean>>thenApply(maybe_bucket -> {
                    return maybe_bucket.<DataBucketBean>map(bucket -> {
                        if (_service_context.getSecurityService().isUserPermitted(
                                _mutable_state.bucket.get().owner_id(), bucket,
                                Optional.of(ISecurityService.ACTION_READ_WRITE))) {
                            return bucket;
                        } else
                            return null; //(returns an Optional.empty())
                    });
                }).join();

        return validated_external_bucket
                .<Either<Either<IBatchSubservice<JsonNode>, IDataWriteService<JsonNode>>, String>>map(bucket -> {

                    // easy case:
                    if (null != bucket.master_enrichment_type()) {
                        return getExternalEndpoint(bucket, bucket.master_enrichment_type());
                    } else if (null == bucket.analytic_thread()) { // if no enrichment specified and no analytic thread, then assume we're just using it as a file queue (ie batch)
                        return getExternalEndpoint(bucket, MasterEnrichmentType.batch);
                    } else { // Analytic bucket, this is more complicated

                        final MasterEnrichmentType streaming_type_guess = Optionals
                                .of(() -> bucket.analytic_thread().jobs()).orElse(Collections.emptyList()).stream()
                                .filter(j -> {
                                    return Optionals.ofNullable(j.inputs()).stream().anyMatch(i -> {
                                        final String resource_name_or_id = Optional
                                                .ofNullable(i.resource_name_or_id()).orElse("");
                                        final boolean matches = (resource_name_or_id.isEmpty()
                                                || resource_name_or_id.equals(bucket.full_name()));
                                        return matches;
                                    });
                                }).map(j -> j.analytic_type()).findFirst().orElse(MasterEnrichmentType.batch) // if no enrichment specified and no analytic thread sat on the input, then assume we're just using it as a file queue (ie batch)
                        ;

                        return getExternalEndpoint(bucket, streaming_type_guess);
                    }

                }).orElse(Either.right(null));
    }

    /** Utility given the streaming/batch type to return the corresponding data service
     * @param bucket
     * @param type
     * @return
     */
    protected Either<Either<IBatchSubservice<JsonNode>, IDataWriteService<JsonNode>>, String> getExternalEndpoint(
            final DataBucketBean bucket, MasterEnrichmentType type) {
        if (type.equals(MasterEnrichmentType.none)) {
            return null;
        } else if (type.equals(MasterEnrichmentType.streaming)
                || type.equals(MasterEnrichmentType.streaming_and_batch)) {
            return Either.right(_distributed_services.generateTopicName(bucket.full_name(),
                    ICoreDistributedServices.QUEUE_START_NAME));
        } else { // batch
            final Optional<IDataWriteService<JsonNode>> crud_storage_service = _storage_service.getDataService()
                    .flatMap(s -> s.getWritableDataService(JsonNode.class, bucket,
                            Optional.of(IStorageService.StorageStage.transient_input.toString()),
                            Optional.empty()));

            final Optional<IDataWriteService.IBatchSubservice<JsonNode>> batch_storage_service = crud_storage_service
                    .flatMap(IDataWriteService::getBatchWriteSubservice);

            return batch_storage_service
                    .<Either<Either<IBatchSubservice<JsonNode>, IDataWriteService<JsonNode>>, String>>map(
                            batch -> Either.left(Either.left(batch)))
                    .orElseGet(() -> {
                        return crud_storage_service
                                .<Either<Either<IBatchSubservice<JsonNode>, IDataWriteService<JsonNode>>, String>>map(
                                        slow -> Either.left(Either.right(slow)))
                                .orElse(null);
                    });
        }
    }

    /////////////////////////////////////////////////////////////////////////////////////////////////

    // LOTS AND LOTS OF LOGIC TO MANAGE THE OUTPUTS

    //TODO (ALEPH-12): need to add sub-bucket support everywhere here

    ////////////////////////////////////////////////////

    // PUBLIC INTERFACE

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext#flushBatchOutput(java.util.Optional, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean)
     */
    @Override
    public CompletableFuture<?> flushBatchOutput(Optional<DataBucketBean> bucket, AnalyticThreadJobBean job) {
        _mutable_state.has_unflushed_data = false; // (just means that the shutdown hook will do nothing)

        // (this can safely be run for multiple jobs since it only applies to the outputter we're using in this process anyway, plus multiple
        // flushes don't have any functional side effects)

        final DataBucketBean my_bucket = bucket.orElseGet(() -> _mutable_state.bucket.get());

        _logger.info(ErrorUtils.get("Flushing output for bucket:job {0}:{1}", my_bucket.full_name(), job.name()));

        //first flush loggers
        final Stream<CompletableFuture<?>> flush_loggers = _mutable_state.bucket_loggers.values().stream()
                .map(l -> l.flush());

        // Flush external and sub-buckets:
        final Stream<CompletableFuture<?>> flush_external = _mutable_state.external_buckets.values().stream()
                .map(e -> {
                    return e.<CompletableFuture<?>>either(ee -> ee.<CompletableFuture<?>>either(batch -> {
                        return batch.flushOutput(); // flush external output
                    }, slow -> {
                        //(nothing to do)
                        return (CompletableFuture<?>) CompletableFuture.completedFuture(Unit.unit());

                    }), topic -> {
                        //(nothing to do)
                        return (CompletableFuture<?>) CompletableFuture.completedFuture(Unit.unit());
                    });
                });
        final Stream<CompletableFuture<?>> flush_sub = _mutable_state.sub_buckets.values().stream()
                .map(sub_context -> sub_context.flushBatchOutput(bucket, job));

        final Stream<CompletableFuture<?>> flush_writer = Stream
                .of(_multi_writer.optional().<CompletableFuture<?>>map(writer -> writer.flushBatchOutput())
                        .orElseGet(() -> (CompletableFuture<?>) CompletableFuture.completedFuture(Unit.unit())));

        // Important: this is the line that actually executes all the flushes, so need to ensure each of the above is added here:
        return CompletableFuture.allOf(Stream.of(flush_loggers, flush_external, flush_sub, flush_writer)
                .flatMap(__ -> __).toArray(CompletableFuture[]::new));
    }

    /** For ping/pong output will switch the ping/pong buffers over
     *  CALLED FROM DIM ONLY - NOT FOR USER CONSUMPTION
     *  CALLED FROM TECHNOLOGY _NOT_ MODULE
     * @param bucket
     */
    public void completeBucketOutput(final DataBucketBean bucket) {
        Optionals.of(() -> bucket.analytic_thread().jobs()).orElse(Collections.emptyList()).stream()
                .filter(j -> !Optionals.of(() -> j.output().is_transient()).orElse(false)) //(these are handled separately)
                .filter(j -> needPingPongBuffer(bucket, Optional.of(j))).findFirst() // (currently all jobs have to point to the same output)
                .ifPresent(j -> { // need the ping pong buffer, so switch them   
                    _logger.info(ErrorUtils.get("Completing non-transient output of bucket {0} (using job {1})",
                            bucket.full_name(), j.name()));
                    switchJobOutputBuffer(bucket, j);
                });
    }

    /** For transient jobs completes output
     * @param bucket
     */
    public void completeJobOutput(final DataBucketBean bucket, final AnalyticThreadJobBean job) {
        if (Optionals.of(() -> job.output().is_transient()).orElse(false)) {
            _logger.info(ErrorUtils.get("Completing transient output of bucket:job {0}:{1}", bucket.full_name(),
                    job.name()));
            switchJobOutputBuffer(bucket, job);
        }
        //(else nothing to do)
    }

    ////////////////////////////////////////////////////

    // INTERNAL UTILS - LOW-ISH LEVEL

    /** Setup the outputs for the given bucket
     */
    protected void setupOutputs(final DataBucketBean bucket, final AnalyticThreadJobBean job) {
        final boolean need_ping_pong_buffer = needPingPongBuffer(bucket, Optional.of(job));

        final boolean output_is_transient = Optionals.of(() -> job.output().is_transient()).orElse(false);

        if (output_is_transient) { // no index output if batch disabled
            _multi_writer.set(MultiDataService.getTransientMultiWriter(bucket, _service_context,
                    IStorageService.StorageStage.transient_output.toString() + ":" + job.name(),
                    Optional.of(s -> getSecondaryBuffer(bucket, Optional.of(job), need_ping_pong_buffer, s))));
        } else { // normal case
            _multi_writer.set(MultiDataService.getMultiWriter(bucket, _service_context, Optional.empty(),
                    Optional.of(s -> getSecondaryBuffer(bucket, Optional.of(job), need_ping_pong_buffer, s))));
        }
    }

    /** Handles ping/pong buffer set up for transient jobs' outputs, called once per job on startup
     * @param state_bucket
     * @param job
     */
    public void centralPerJobOutputSetup(final DataBucketBean state_bucket, final AnalyticThreadJobBean job) {
        final boolean need_ping_pong_buffer = needPingPongBuffer(state_bucket, Optional.of(job));
        // _either_ the entire bucket needs ping/pong, or it's a transient job that needs it

        _logger.info(ErrorUtils.get("Central per bucket:job setup for {0}:{1} need to do anything = {2}",
                state_bucket.full_name(), job.name(), need_ping_pong_buffer));

        if (need_ping_pong_buffer) {
            setupOutputs(state_bucket, job);

            // If it's transient then delete the transient buffer
            // (for non transient jobs - ie that share the bucket's output, then we do it centrally below)
            if (Optionals.of(() -> job.output().is_transient()).orElse(false)) {
                _multi_writer.get().getCrudWriters().stream().forEach(crud -> crud.deleteDatastore().join());
            }
        }
        ;
    }

    /** Handles ping/pong buffer set up for the entire bucket's output, called once per job on startup (but should only do something
     *  for the first job to run)
     * @param state_bucket
     */
    public void centralPerBucketOutputSetup(final DataBucketBean state_bucket) {

        // This is a more complex case .. for now we'll just delete the data on the _first_ job with no dependencies we encounter

        Optionals.of(() -> state_bucket.analytic_thread().jobs()).orElse(Collections.emptyList()).stream()
                .filter(j -> Optionals.ofNullable(j.dependencies()).isEmpty()) // can't have any dependencies
                .filter(j -> Optional.ofNullable(j.enabled()).orElse(true)) // enabled
                .findFirst().ifPresent(first_job -> {
                    final boolean need_ping_pong_buffer = needPingPongBuffer(state_bucket, Optional.empty());
                    _logger.info(ErrorUtils.get(
                            "Central per bucket setup for {0} (used job {1})): need to do anything = {2}",
                            state_bucket.full_name(), first_job.name(), need_ping_pong_buffer));

                    // (only do anything if the bucket globally has a ping/pong buffer)
                    if (need_ping_pong_buffer) {
                        _multi_writer.get().getCrudWriters().forEach(s -> s.deleteDatastore().join());
                    }
                });
    }

    ////////////////////////////////////////////////////

    // INTERNAL UTILS - LOW LEVEL

    /** Util to check if we're using a ping pong buffer to change over data "atomically"
     *  Either for a transient job or the entire bucket
     *  Note that for the global outputs, there can be only setting so we read from the bucket
     * @param job - if this is not present or isn't a transient job then this is global
     * @return
     */
    protected static boolean needPingPongBuffer(final DataBucketBean bucket,
            final Optional<AnalyticThreadJobBean> job) {
        final boolean need_ping_pong_buffer = job
                .filter(j -> Optionals.of(() -> j.output().is_transient()).orElse(false)) // only transients
                .map(j -> !Optionals.of(() -> j.output().preserve_existing_data()).orElse(false) && _batch_types
                        .contains(Optional.ofNullable(j.analytic_type()).orElse(MasterEnrichmentType.none)))
                .orElseGet(() -> { // This is the bucket case  ... if there are any batch jobs that don't preserve existing data
                    return Optionals.of(() -> bucket.analytic_thread().jobs()).orElse(Collections.emptyList())
                            .stream().filter(j -> !Optionals.of(() -> j.output().is_transient()).orElse(false)) // not transient
                            .filter(j -> Optional.ofNullable(j.enabled()).orElse(true)) // enabled
                            .filter(j -> _batch_types.contains(
                                    Optional.ofNullable(j.analytic_type()).orElse(MasterEnrichmentType.none))) // batch
                            .filter(j -> !Optionals.of(() -> j.output().preserve_existing_data()).orElse(false)) // not preserving data
                            .findFirst().isPresent();
                });

        return need_ping_pong_buffer;
    }

    /** Internal utility for completing job output
     *  NOTE IF THE JOB IS NOT TRANSIENT THEN IT SWITCHES THE ENTIRE BUFFER ACROSS
     *  (WHICH WON'T WORK IF THERE ARE >1 JOBS)
     * @param bucket
     * @param job
     */
    private void switchJobOutputBuffer(final DataBucketBean bucket, final AnalyticThreadJobBean job) {
        final Optional<String> job_name = Optional.of(job)
                .filter(j -> Optionals.of(() -> j.output().is_transient()).orElse(false)).map(j -> j.name());
        final Consumer<IGenericDataService> switchPrimary = s -> {
            getSecondaryBuffer(bucket, Optional.of(job), true, s).ifPresent(curr_secondary -> {
                final CompletableFuture<BasicMessageBean> result = s.switchCrudServiceToPrimaryBuffer(bucket,
                        Optional.of(curr_secondary),
                        IGenericDataService.SECONDARY_PING.equals(curr_secondary)
                                ? Optional.of(IGenericDataService.SECONDARY_PONG) //(primary must have been ping)
                                : Optional.of(IGenericDataService.SECONDARY_PING),
                        job_name);

                // Log on error:
                result.thenAccept(res -> {
                    if (!res.success()) {
                        _logger.warn(ErrorUtils.get("Bucket:job {0}:{1} service {2}: {3}", bucket.full_name(),
                                job.name(), s.getClass().getSimpleName(), res.message()));
                    }
                });
            });
        };
        if (!_multi_writer.isSet()) {
            setupOutputs(bucket, job);
        }
        _multi_writer.optional().ifPresent(w -> w.getDataServices()
                .forEach(s -> s.getDataService().ifPresent(ss -> switchPrimary.accept(ss))));
    }

    /** Gets the secondary buffer (deletes any existing data, and switches to "ping" on an uninitialized index)
     *  NOTE: CAN HAVE SIDE EFFECTS IF UNINITIALIZED
     * @param bucket
     * @param job - if present _and_ points to transient output, then returns the buffers for that transient output, else for the entire bucket
     * @param need_ping_pong_buffer - based on the job.output
     * @param data_service
     * @return
     */
    protected Optional<String> getSecondaryBuffer(final DataBucketBean bucket,
            final Optional<AnalyticThreadJobBean> job, final boolean need_ping_pong_buffer,
            final IGenericDataService data_service) {
        if (need_ping_pong_buffer) {
            final Optional<String> job_name = job
                    .filter(j -> Optionals.of(() -> j.output().is_transient()).orElse(false)).map(j -> j.name());
            final Optional<String> write_buffer = data_service.getPrimaryBufferName(bucket, job_name)
                    .map(Optional::of).orElseGet(() -> { // Two cases:

                        final Set<String> secondaries = data_service.getSecondaryBuffers(bucket, job_name);
                        final int ping_pong_count = (secondaries.contains(IGenericDataService.SECONDARY_PING) ? 1
                                : 0) + (secondaries.contains(IGenericDataService.SECONDARY_PONG) ? 1 : 0);

                        if (1 == ping_pong_count) { // 1) one of ping/pong exists but not the other ... this is the file case where we can't tell what the primary actually is
                            if (secondaries.contains(IGenericDataService.SECONDARY_PONG)) { //(eg pong is secondary so ping must be primary)
                                return Optional.of(IGenericDataService.SECONDARY_PING);
                            } else
                                return Optional.of(IGenericDataService.SECONDARY_PONG);
                        } else { // 2) all other cases: this is the ES case, where we just use an alias to switch ..
                            // So here there are side effects
                            if (_state_name == State.IN_MODULE) { // this should not happen (unless the data service doesn't support secondary buffers)
                                _logger.warn(ErrorUtils.get(
                                        "Startup case: no primary buffer for bucket:job {0}:{1} service {2}, number of secondary buffers = {3} (ping/pong={4}, secondaries={5})",
                                        bucket.full_name(), job_name.orElse("(none)"),
                                        data_service.getClass().getSimpleName(), ping_pong_count,
                                        need_ping_pong_buffer,
                                        secondaries.stream().collect(Collectors.joining(";"))));
                            } else {
                                _logger.info(ErrorUtils.get(
                                        "Startup case: no primary buffer for bucket:job {0}:{1} service {2}, number of secondary buffers = {3} (ping/pong={4})",
                                        bucket.full_name(), job_name.orElse("(none)"),
                                        data_service.getClass().getSimpleName(), ping_pong_count,
                                        need_ping_pong_buffer));
                            }

                            // ... but we don't currently have a primary so need to build that
                            if (0 == ping_pong_count) { // first time through, create the buffers:
                                data_service.getWritableDataService(JsonNode.class, bucket, Optional.empty(),
                                        Optional.of(IGenericDataService.SECONDARY_PONG));
                                data_service.getWritableDataService(JsonNode.class, bucket, Optional.empty(),
                                        Optional.of(IGenericDataService.SECONDARY_PING));
                            }
                            final Optional<String> curr_primary = Optional.of(IGenericDataService.SECONDARY_PING);
                            final CompletableFuture<BasicMessageBean> future_res = data_service
                                    .switchCrudServiceToPrimaryBuffer(bucket, curr_primary, Optional.empty(),
                                            job_name);
                            future_res.thenAccept(res -> {
                                if (!res.success()) {
                                    _logger.warn(ErrorUtils.get(
                                            "Error switching between ping/pong buffers (service {0}: ",
                                            data_service.getClass().getSimpleName()) + res.message());
                                }
                            });
                            return curr_primary;
                        }
                    }).map(curr_pri -> { // then just pick the buffer that isn't the primary
                        if (IGenericDataService.SECONDARY_PING.equals(curr_pri)) {
                            return IGenericDataService.SECONDARY_PONG;
                        } else
                            return IGenericDataService.SECONDARY_PING;
                    });

            return write_buffer;
        } else
            return Optional.empty();
    };

    /** Utility to grab a generic data service (top level) - underlying version
     * @param context
     * @param service_type
     * @param service_name
     * @return
     */
    protected static Optional<? extends IUnderlyingService> getUnderlyingService(final IServiceContext context,
            final String service_type, final Optional<String> service_name) {
        return DataServiceUtils.getUnderlyingServiceInterface(service_type)
                .flatMap(ds -> context.getService(ds, service_name.filter(s -> !s.isEmpty())));
    }

    /** Utility to grab a generic data service (top level) - data service version
     * @param context
     * @param service_type
     * @param service_name
     * @return
     */
    protected static Optional<? extends IDataServiceProvider> getDataService(final IServiceContext context,
            final String service_type, final Optional<String> service_name) {
        return getUnderlyingService(context, service_type, service_name).map(s -> (IDataServiceProvider) s);
    }

}