Java tutorial
/******************************************************************************* * Copyright 2015, The IKANOW Open Source Project. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ package com.ikanow.aleph2.data_import.services; import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.LinkedList; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; import java.util.stream.Collector; import java.util.stream.Stream; import java.util.stream.StreamSupport; import org.apache.commons.io.FileUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import scala.Tuple2; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet.Builder; import com.google.inject.Inject; import com.google.inject.Injector; import com.ikanow.aleph2.core.shared.services.MultiDataService; import com.ikanow.aleph2.core.shared.utils.JarCacheUtils; import com.ikanow.aleph2.core.shared.utils.LiveInjector; import com.ikanow.aleph2.core.shared.utils.SharedErrorUtils; import com.ikanow.aleph2.data_import.utils.ErrorUtils; import com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext; import com.ikanow.aleph2.data_model.interfaces.data_services.IColumnarService; import com.ikanow.aleph2.data_model.interfaces.data_services.IDocumentService; import com.ikanow.aleph2.data_model.interfaces.data_services.IManagementDbService; import com.ikanow.aleph2.data_model.interfaces.data_services.ISearchIndexService; import com.ikanow.aleph2.data_model.interfaces.data_services.IStorageService; import com.ikanow.aleph2.data_model.interfaces.data_services.ITemporalService; import com.ikanow.aleph2.data_model.interfaces.shared_services.IBucketLogger; import com.ikanow.aleph2.data_model.interfaces.shared_services.ICrudService; import com.ikanow.aleph2.data_model.interfaces.shared_services.IDataWriteService; import com.ikanow.aleph2.data_model.interfaces.shared_services.ILoggingService; import com.ikanow.aleph2.data_model.interfaces.shared_services.ISecurityService; import com.ikanow.aleph2.data_model.interfaces.shared_services.IUnderlyingService; import com.ikanow.aleph2.data_model.interfaces.shared_services.IServiceContext; import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean; import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean.MasterEnrichmentType; import com.ikanow.aleph2.data_model.objects.data_import.DataBucketStatusBean; import com.ikanow.aleph2.data_model.objects.shared.AssetStateDirectoryBean; import com.ikanow.aleph2.data_model.objects.shared.BasicMessageBean; import com.ikanow.aleph2.data_model.objects.shared.GlobalPropertiesBean; import com.ikanow.aleph2.data_model.objects.shared.SharedLibraryBean; import com.ikanow.aleph2.data_model.utils.CrudUtils; import com.ikanow.aleph2.data_model.utils.BeanTemplateUtils.BeanTemplate; import com.ikanow.aleph2.data_model.utils.CrudUtils.MultiQueryComponent; import com.ikanow.aleph2.data_model.utils.CrudUtils.SingleQueryComponent; import com.ikanow.aleph2.data_model.utils.BeanTemplateUtils; import com.ikanow.aleph2.data_model.utils.JsonUtils; import com.ikanow.aleph2.data_model.utils.Lambdas; import com.ikanow.aleph2.data_model.utils.ModuleUtils; import com.ikanow.aleph2.data_model.utils.Optionals; import com.ikanow.aleph2.data_model.utils.Patterns; import com.ikanow.aleph2.data_model.utils.PropertiesUtils; import com.ikanow.aleph2.data_model.utils.SetOnce; import com.ikanow.aleph2.data_model.utils.Tuples; import com.ikanow.aleph2.distributed_services.data_model.DistributedServicesPropertyBean; import com.ikanow.aleph2.distributed_services.services.ICoreDistributedServices; import com.ikanow.aleph2.distributed_services.utils.KafkaUtils; import com.sun.xml.internal.rngom.binary.Pattern; import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; import com.typesafe.config.ConfigRenderOptions; import com.typesafe.config.ConfigValueFactory; import fj.Unit; import fj.data.Either; @SuppressWarnings("unused") public class HarvestContext implements IHarvestContext { protected static final Logger _logger = LogManager.getLogger(); public static final String __MY_BUCKET_ID = "030e2b82-0285-11e5-a322-1697f925ec7b"; public static final String __MY_TECH_LIBRARY_ID = "030e2b82-0285-11e5-a322-1697f925ec7c"; public static final String __MY_MODULE_LIBRARY_ID = "030e2b82-0285-11e5-a322-1697f925ec7d"; public enum State { IN_TECHNOLOGY, IN_MODULE }; protected final State _state_name; protected static class MutableState { final SetOnce<DataBucketBean> bucket = new SetOnce<>(); final SetOnce<SharedLibraryBean> technology_config = new SetOnce<>(); final SetOnce<Map<String, SharedLibraryBean>> library_configs = new SetOnce<>(); final SetOnce<ImmutableSet<Tuple2<Class<? extends IUnderlyingService>, Optional<String>>>> service_manifest_override = new SetOnce<>(); final SetOnce<Boolean> initialized_direct_output = new SetOnce<>(); private Map<String, IBucketLogger> bucket_loggers = new HashMap<String, IBucketLogger>(); }; protected final MutableState _mutable_state = new MutableState(); // (stick this injection in and then call injectMembers in IN_MODULE case) @Inject protected IServiceContext _service_context; protected IManagementDbService _core_management_db; protected ICoreDistributedServices _distributed_services; protected IStorageService _storage_service; protected ILoggingService _logging_service; protected GlobalPropertiesBean _globals; protected Optional<IDataWriteService<String>> _crud_intermed_storage_service = Optional.empty(); protected Optional<IDataWriteService.IBatchSubservice<String>> _batch_intermed_storage_service = Optional .empty(); // For writing objects out protected SetOnce<MultiDataService> _multi_writer = new SetOnce<>(); protected final ObjectMapper _mapper = BeanTemplateUtils.configureMapper(Optional.empty()); private static ConcurrentHashMap<String, HarvestContext> static_instances = new ConcurrentHashMap<>(); /**Guice injector * @param service_context */ @Inject public HarvestContext(final IServiceContext service_context) { _state_name = State.IN_TECHNOLOGY; _service_context = service_context; _core_management_db = service_context.getCoreManagementDbService(); // (actually returns the _core_ management db service) _distributed_services = service_context.getService(ICoreDistributedServices.class, Optional.empty()).get(); _storage_service = service_context.getStorageService(); _logging_service = service_context.getService(ILoggingService.class, Optional.empty()).get(); //(currently don't need to initialize any other data services - unlike in analytics context which has various set up requirements) _globals = service_context.getGlobalProperties(); } /** In-module constructor */ public HarvestContext() { _state_name = State.IN_MODULE; // Can't do anything until initializeNewContext is called } /** (FOR INTERNAL DATA MANAGER USE ONLY) Sets the bucket for this harvest context instance * @param this_bucket - the bucket to associated * @returns whether the bucket has been updated (ie fails if it's already been set) */ public boolean setBucket(DataBucketBean this_bucket) { return _mutable_state.bucket.set(this_bucket); } /** (FOR INTERNAL DATA MANAGER USE ONLY) Sets the library bean for this harvest context instance * @param this_bucket - the library bean to be associated * @returns whether the library bean has been updated (ie fails if it's already been set) */ public boolean setTechnologyConfig(SharedLibraryBean lib_config) { return _mutable_state.technology_config.set(lib_config); } /** (FOR INTERNAL DATA MANAGER USE ONLY) Sets the optional module library bean for this context instance * @param this_bucket - the library bean to be associated * @returns whether the library bean has been updated (ie fails if it's already been set) */ public boolean setLibraryConfigs(final Map<String, SharedLibraryBean> lib_configs) { return _mutable_state.library_configs.set(lib_configs); } /** A very simple container for library beans * @author Alex */ public static class LibraryContainerBean { LibraryContainerBean() { } LibraryContainerBean(Collection<SharedLibraryBean> libs) { this.libs = new ArrayList<>(libs); } List<SharedLibraryBean> libs; } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#initializeNewContext(java.lang.String) */ @Override public void initializeNewContext(final String signature) { try { // Inject dependencies final Config parsed_config = ConfigFactory.parseString(signature); final HarvestContext to_clone = static_instances.get(signature); if (null != to_clone) { //copy the fields _service_context = to_clone._service_context; _core_management_db = to_clone._core_management_db; _logging_service = to_clone._logging_service; _distributed_services = to_clone._distributed_services; _storage_service = to_clone._storage_service; _globals = to_clone._globals; // (apart from bucket, which is handled below, rest of mutable state is not needed) } else { ModuleUtils.initializeApplication(Collections.emptyList(), Optional.of(parsed_config), Either.right(this)); _core_management_db = _service_context.getCoreManagementDbService(); // (actually returns the _core_ management db service) _distributed_services = _service_context .getService(ICoreDistributedServices.class, Optional.empty()).get(); _logging_service = _service_context.getService(ILoggingService.class, Optional.empty()).get(); _storage_service = _service_context.getStorageService(); _globals = _service_context.getGlobalProperties(); } // Get bucket final BeanTemplate<DataBucketBean> retrieve_bucket = BeanTemplateUtils .from(parsed_config.getString(__MY_BUCKET_ID), DataBucketBean.class); this.setBucket(retrieve_bucket.get()); //(also checks on dedup setting) final BeanTemplate<SharedLibraryBean> retrieve_library = BeanTemplateUtils .from(parsed_config.getString(__MY_TECH_LIBRARY_ID), SharedLibraryBean.class); _mutable_state.technology_config.set(retrieve_library.get()); if (parsed_config.hasPath(__MY_MODULE_LIBRARY_ID)) { final BeanTemplate<LibraryContainerBean> retrieve_module = BeanTemplateUtils .from(parsed_config.getString(__MY_MODULE_LIBRARY_ID), LibraryContainerBean.class); _mutable_state.library_configs.set(Optional.ofNullable(retrieve_module.get().libs) .orElse(Collections.emptyList()).stream() // (split each lib bean into 2 tuples, ie indexed by _id and path_name) .flatMap(mod -> Arrays.asList(Tuples._2T(mod._id(), mod), Tuples._2T(mod.path_name(), mod)) .stream()) .collect(Collectors.toMap(t2 -> t2._1(), t2 -> t2._2(), (t1, t2) -> t1 // (can't happen, ignore if it does) , () -> new LinkedHashMap<String, SharedLibraryBean>()))); } // Always want intermediate output service: _batch_intermed_storage_service = (_crud_intermed_storage_service = _storage_service.getDataService() .flatMap(s -> s.getWritableDataService(String.class, retrieve_bucket.get(), Optional.of(IStorageService.StorageStage.json.toString()), Optional.empty()))) .flatMap(IDataWriteService::getBatchWriteSubservice); // Only create final output services for buckets that have no streaming enrichment: // (otherwise can still create lazily if emitObject is called) if (MasterEnrichmentType.none == Optional.ofNullable(retrieve_bucket.get().master_enrichment_type()) .orElse(MasterEnrichmentType.none)) { initializeOptionalOutput(Optional.empty()); } static_instances.put(signature, this); } catch (Exception e) { //DEBUG //System.out.println(ErrorUtils.getLongForm("{0}", e)); throw new RuntimeException(e); } } /** Sets up the writers for optional output (not normally needed - only if enrichment is disabled) * @param bucket */ protected void initializeOptionalOutput(final Optional<DataBucketBean> bucket) { if (_mutable_state.initialized_direct_output.isSet()) { return; } final DataBucketBean my_bucket = bucket.orElseGet(() -> _mutable_state.bucket.get()); synchronized (this) { _mutable_state.initialized_direct_output.trySet(true); _multi_writer.set(MultiDataService.getMultiWriter(my_bucket, _service_context)); } } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#getService(java.lang.Class, java.util.Optional) */ @Override public IServiceContext getServiceContext() { return _service_context; } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#sendObjectToStreamingPipeline(java.util.Optional, com.fasterxml.jackson.databind.JsonNode) */ @Override public void sendObjectToStreamingPipeline(Optional<DataBucketBean> bucket, Either<JsonNode, Map<String, Object>> object) { final DataBucketBean this_bucket = bucket.orElseGet(() -> _mutable_state.bucket.get()); final boolean streaming_pipeline_disabled = (MasterEnrichmentType.none == Optional .ofNullable(this_bucket.master_enrichment_type()).orElse(MasterEnrichmentType.none)); final String topic = _distributed_services.generateTopicName(this_bucket.full_name(), Optional.empty()); final boolean emit_to_pipeline = Lambdas.get(() -> { if (streaming_pipeline_disabled) { this.emitObject(bucket, object); return (_distributed_services.doesTopicExist(topic)); } else return true; }); if (emit_to_pipeline) { final String obj_str = object.either(JsonNode::toString, map -> _mapper.convertValue(map, JsonNode.class).toString()); if (_batch_intermed_storage_service.isPresent()) { _batch_intermed_storage_service.get().storeObject(obj_str); } else if (_crud_intermed_storage_service.isPresent()) { // (super slow) _crud_intermed_storage_service.get().storeObject(obj_str); } _distributed_services.produce(topic, obj_str); } } /** Whether the bucket needs direct output to file * @param bucket * @return */ public static boolean hasDirectStorageOutput(final DataBucketBean bucket) { return (MasterEnrichmentType.none == Optional.ofNullable(bucket.master_enrichment_type()) .orElse(MasterEnrichmentType.none)) && Optionals.of(() -> bucket.data_schema().storage_schema().processed()) .map(p -> Optional.ofNullable(p.enabled()).orElse(true)).orElse(false); } /** Whether the bucket needs direct output to the search index * @param bucket * @return */ public static boolean hasSearchIndexOutput(final DataBucketBean bucket) { return (MasterEnrichmentType.none == Optional.ofNullable(bucket.master_enrichment_type()) .orElse(MasterEnrichmentType.none)) && Optionals.of(() -> bucket.data_schema().search_index_schema()) .map(p -> Optional.ofNullable(p.enabled()).orElse(true)).orElse(false); } /** Whether the bucket needs direct output to the search index * @param bucket * @return */ public static boolean hasDocumentOutput(final DataBucketBean bucket) { return (MasterEnrichmentType.none == Optional.ofNullable(bucket.master_enrichment_type()) .orElse(MasterEnrichmentType.none)) && Optionals.of(() -> bucket.data_schema().document_schema()) .map(p -> Optional.ofNullable(p.enabled()).orElse(true)).orElse(false); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#getHarvestContextLibraries(java.util.Optional) */ @Override public List<String> getHarvestContextLibraries( final Optional<Set<Tuple2<Class<? extends IUnderlyingService>, Optional<String>>>> services) { // Consists of: // 1) This library // 2) Libraries that are always needed: // - core distributed services (implicit) // - management db (core + underlying + any underlying drivers) // - data model // 3) Any libraries associated with the services if (_state_name == State.IN_TECHNOLOGY) { if (!_mutable_state.service_manifest_override.isSet()) { getHarvestContextSignature(_mutable_state.bucket.optional(), services); } // Already registered final Set<String> all_service_class_files = this.getUnderlyingArtefacts().stream() .map(service -> LiveInjector.findPathJar(service.getClass(), "")).collect(Collectors.toSet()); // Combine them together final List<String> ret_val = ImmutableSet.<String>builder().addAll(all_service_class_files).build() .stream().filter(f -> (null != f) && !f.equals("")).collect(Collectors.toList()); if (ret_val.isEmpty()) { _logger.warn( "WARNING: no library files found, probably because this is running from an IDE - instead taking all JARs from: " + (_globals.local_root_dir() + "/lib/")); } return !ret_val.isEmpty() ? ret_val : // Special case: no aleph2 libs found, this is almost certainly because this is being run from eclipse... Lambdas.get(() -> { try { return FileUtils .listFiles(new File(_globals.local_root_dir() + "/lib/"), new String[] { "jar" }, false) .stream().map(File::toString).collect(Collectors.toList()); } catch (Exception e) { throw new RuntimeException("In eclipse/IDE mode, directory not found: " + (_globals.local_root_dir() + "/lib/")); } }); } else { throw new RuntimeException(ErrorUtils.TECHNOLOGY_NOT_MODULE); } } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#getHarvestContextSignature(java.util.Optional) */ @Override public String getHarvestContextSignature(final Optional<DataBucketBean> bucket, final Optional<Set<Tuple2<Class<? extends IUnderlyingService>, Optional<String>>>> services) { if (_state_name == State.IN_TECHNOLOGY) { // Returns a config object containing: // - set up for any of the services described // - all the rest of the configuration // - the bucket bean ID final Config full_config = ModuleUtils.getStaticConfig() .withoutPath(DistributedServicesPropertyBean.APPLICATION_NAME) .withoutPath("MongoDbManagementDbService.v1_enabled") // (special workaround for V1 sync service) ; final Optional<Config> service_config = PropertiesUtils.getSubConfig(full_config, "service"); final Optional<DataBucketBean> maybe_bucket = bucket.map(Optional::of) .orElseGet(() -> _mutable_state.bucket.optional()); final ImmutableSet<Tuple2<Class<? extends IUnderlyingService>, Optional<String>>> complete_services_set = Optional .of(ImmutableSet.<Tuple2<Class<? extends IUnderlyingService>, Optional<String>>>builder() .addAll(services.orElse(Collections.emptySet())) .add(Tuples._2T(ICoreDistributedServices.class, Optional.empty())) .add(Tuples._2T(IManagementDbService.class, Optional.empty())) .add(Tuples._2T(IStorageService.class, Optional.empty())) .add(Tuples._2T(ISecurityService.class, Optional.empty())) .add(Tuples._2T(ILoggingService.class, Optional.empty())) //doesn't pull in ES via getUnderlyingArtefacts, relies on the one here .add(Tuples._2T(IManagementDbService.class, IManagementDbService.CORE_MANAGEMENT_DB))) // Optional services: //TODO (ALEPH-19): 1) should port this across to the more comprehensive/centralized CSL, 2) Do I need a "support direct output" flag, and not do this if not set? // seems like a waste to stick these JARs on the classpath when the Harvester is normally only writing to real-time/file-based queue? // (see AnalyticsContext/DataServiceUtils for more details on point #1) .map(sb -> (maybe_bucket.map(b -> hasSearchIndexOutput(b)).orElse(false)) ? sb.add(Tuples._2T(ISearchIndexService.class, Optional.empty())) .add(Tuples._2T(ITemporalService.class, Optional.empty())).add( Tuples._2T(IColumnarService.class, Optional.empty())) : sb) .map(sb -> (maybe_bucket.map(b -> hasDocumentOutput(b)).orElse(false)) ? sb.add(Tuples._2T(IDocumentService.class, Optional.empty())) : sb) .map(sb -> sb.build()).get(); final Config config_no_services = full_config.withoutPath("service"); if (_mutable_state.service_manifest_override.isSet()) { if (!complete_services_set.equals(_mutable_state.service_manifest_override.get())) { throw new RuntimeException(ErrorUtils.SERVICE_RESTRICTIONS); } } else { _mutable_state.service_manifest_override.set(complete_services_set); } // Ugh need to add: core deps, core + underlying management db to this list final Config service_defn_subset = complete_services_set.stream() // DON'T MAKE PARALLEL SEE BELOW .map(clazz_name -> { final String config_path = clazz_name._2() .orElse(clazz_name._1().getSimpleName().substring(1)); return Lambdas .wrap_u(__ -> service_config.get().hasPath(config_path) ? Tuples._2T(config_path, service_config.get().getConfig(config_path)) : null) //(could add extra transforms here if we wanted) .apply(Unit.unit()); }).filter(cfg -> null != cfg).reduce(ConfigFactory.empty(), (acc, k_v) -> acc.withValue(k_v._1(), k_v._2().root()), (acc1, acc2) -> acc1 // (This will never be called as long as the above stream is not parallel) ); // Service configuration: final Config service_cfgn_subset = _mutable_state.service_manifest_override.get().stream() // DON'T MAKE PARALLEL SEE BELOW .reduce(config_no_services, // (leave other configurations, we just transform service specific configuration) (acc, clazz_name) -> { final Optional<? extends IUnderlyingService> underlying_service = _service_context .getService(clazz_name._1(), clazz_name._2()); return underlying_service.map(ds -> ds.createRemoteConfig(bucket, acc)).orElse(acc); }, (acc1, acc2) -> acc1 // (This will never be called as long as the above stream is not parallel) ); final Config config_subset_services = service_cfgn_subset.withValue("service", service_defn_subset.root()); final Config last_call = Lambdas .get(() -> _mutable_state.library_configs.isSet() ? config_subset_services .withValue(__MY_MODULE_LIBRARY_ID, ConfigValueFactory .fromAnyRef(BeanTemplateUtils .toJson(new LibraryContainerBean( _mutable_state.library_configs.get().entrySet() .stream() .filter(kv -> kv.getValue().path_name() .equals(kv.getKey())) .map(kv -> kv.getValue()) .collect(Collectors.toList()))) .toString())) : config_subset_services) .withValue(__MY_BUCKET_ID, ConfigValueFactory.fromAnyRef( maybe_bucket.map(b -> BeanTemplateUtils.toJson(b).toString()).orElse("{}"))) .withValue(__MY_TECH_LIBRARY_ID, ConfigValueFactory.fromAnyRef(_mutable_state.technology_config .optional().map(l -> BeanTemplateUtils.toJson(l).toString()).orElse("{}"))); return this.getClass().getName() + ":" + last_call.root().render(ConfigRenderOptions.concise()); } else { throw new RuntimeException(ErrorUtils.TECHNOLOGY_NOT_MODULE); } } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#getGlobalHarvestTechnologyObjectStore() */ @Override public <S> ICrudService<S> getGlobalHarvestTechnologyObjectStore(final Class<S> clazz, final Optional<String> collection) { return this.getBucketObjectStore(clazz, Optional.empty(), collection, Optional.of(AssetStateDirectoryBean.StateDirectoryType.library)); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IHarvestContext#getLibraryObjectStore(java.lang.Class, java.util.Optional) */ @Override public <S> Optional<ICrudService<S>> getLibraryObjectStore(final Class<S> clazz, final String name_or_id, final Optional<String> collection) { return Optional.ofNullable(this.getLibraryConfigs().get(name_or_id)) .map(module_lib -> _core_management_db.getPerLibraryState(clazz, module_lib, collection)); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#getHarvestLibraries(java.util.Optional) */ @Override public CompletableFuture<Map<String, String>> getHarvestLibraries(final Optional<DataBucketBean> bucket) { if (_state_name == State.IN_TECHNOLOGY) { final DataBucketBean my_bucket = bucket.orElseGet(() -> _mutable_state.bucket.get()); final SingleQueryComponent<SharedLibraryBean> tech_query = CrudUtils.anyOf(SharedLibraryBean.class) .when(SharedLibraryBean::_id, my_bucket.harvest_technology_name_or_id()) .when(SharedLibraryBean::path_name, my_bucket.harvest_technology_name_or_id()); final List<SingleQueryComponent<SharedLibraryBean>> other_libs = Optionals .ofNullable(my_bucket.harvest_configs()).stream() .flatMap(hcfg -> Optionals.ofNullable(hcfg.library_names_or_ids()).stream()).map(name -> { return CrudUtils.anyOf(SharedLibraryBean.class).when(SharedLibraryBean::_id, name) .when(SharedLibraryBean::path_name, name); }).collect(Collector.of(LinkedList::new, LinkedList::add, (left, right) -> { left.addAll(right); return left; })); @SuppressWarnings("unchecked") final MultiQueryComponent<SharedLibraryBean> spec = CrudUtils.<SharedLibraryBean>anyOf(tech_query, other_libs.toArray(new SingleQueryComponent[other_libs.size()])); // Get the names or ids, get the shared libraries, get the cached ids (must be present) return this._core_management_db.readOnlyVersion().getSharedLibraryStore() .getObjectsBySpec(spec, Arrays.asList(JsonUtils._ID, "path_name"), true).thenApply(cursor -> { return StreamSupport.stream(cursor.spliterator(), false) .collect(Collectors.<SharedLibraryBean, String, String>toMap(lib -> lib.path_name(), lib -> _globals.local_cached_jar_dir() + "/" + JarCacheUtils.buildCachedJarName(lib))); }); } else { throw new RuntimeException(ErrorUtils.TECHNOLOGY_NOT_MODULE); } } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#getBucketObjectStore(java.lang.Class, java.util.Optional, java.util.Optional, boolean) */ @Override public <S> ICrudService<S> getBucketObjectStore(final Class<S> clazz, final Optional<DataBucketBean> bucket, final Optional<String> collection, final Optional<AssetStateDirectoryBean.StateDirectoryType> type) { final Optional<DataBucketBean> this_bucket = bucket.map(x -> Optional.of(x)).orElseGet( () -> _mutable_state.bucket.isSet() ? Optional.of(_mutable_state.bucket.get()) : Optional.empty()); return Patterns.match(type).<ICrudService<S>>andReturn() .when(t -> t.isPresent() && AssetStateDirectoryBean.StateDirectoryType.analytic_thread == t.get(), __ -> _core_management_db.getBucketAnalyticThreadState(clazz, this_bucket.get(), collection)) .when(t -> t.isPresent() && AssetStateDirectoryBean.StateDirectoryType.enrichment == t.get(), __ -> _core_management_db.getBucketEnrichmentState(clazz, this_bucket.get(), collection)) // assume this is the technology context, most likely usage .when(t -> t.isPresent() && AssetStateDirectoryBean.StateDirectoryType.library == t.get(), __ -> _core_management_db.getPerLibraryState(clazz, this.getTechnologyLibraryConfig(), collection)) // default: harvest or not specified: harvest .otherwise(__ -> _core_management_db.getBucketHarvestState(clazz, this_bucket.get(), collection)); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#getBucket() */ @Override public Optional<DataBucketBean> getBucket() { return _mutable_state.bucket.isSet() ? Optional.of(_mutable_state.bucket.get()) : Optional.empty(); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#getBucketStatus(java.util.Optional) */ @Override public CompletableFuture<DataBucketStatusBean> getBucketStatus(final Optional<DataBucketBean> bucket) { return this._core_management_db.readOnlyVersion().getDataBucketStatusStore() .getObjectById(bucket.orElseGet(() -> _mutable_state.bucket.get())._id()) .thenApply(opt_status -> opt_status.get()); // (ie will exception if not present) } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#logStatusForBucketOwner(java.util.Optional, com.ikanow.aleph2.data_model.objects.shared.BasicMessageBean, boolean) */ // @Override // public void logStatusForBucketOwner( // Optional<DataBucketBean> bucket, // BasicMessageBean message, boolean roll_up_duplicates) // { // //TODO (ALEPH-19): Fill this in later // throw new RuntimeException(ErrorUtils.NOT_YET_IMPLEMENTED); // } // // /* (non-Javadoc) // * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#logStatusForBucketOwner(java.util.Optional, com.ikanow.aleph2.data_model.objects.shared.BasicMessageBean) // */ // @Override // public void logStatusForBucketOwner( // Optional<DataBucketBean> bucket, // BasicMessageBean message) { // logStatusForBucketOwner(bucket, message, true); // } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#getLogger(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean) */ @Override public IBucketLogger getLogger(final Optional<DataBucketBean> bucket) { final DataBucketBean b = bucket.orElseGet(() -> _mutable_state.bucket.get()); return _mutable_state.bucket_loggers.computeIfAbsent(b.full_name(), (k) -> _logging_service.getLogger(b)); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#getTempOutputLocation(java.util.Optional) */ @Override public String getTempOutputLocation(Optional<DataBucketBean> bucket) { return _globals.distributed_root_dir() + "/" + bucket.orElseGet(() -> _mutable_state.bucket.get()).full_name() + "/managed_bucket/import/temp/"; } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#getFinalOutputLocation(java.util.Optional) */ @Override public String getFinalOutputLocation(Optional<DataBucketBean> bucket) { return _globals.distributed_root_dir() + "/" + bucket.orElseGet(() -> _mutable_state.bucket.get()).full_name() + "/managed_bucket/import/ready/"; } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#emergencyDisableBucket(java.util.Optional) */ @Override public void emergencyDisableBucket(Optional<DataBucketBean> bucket) { //TODO (ALEPH-19): Fill this in later (need distributed Akka working) throw new RuntimeException(ErrorUtils.NOT_YET_IMPLEMENTED); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#emergencyQuarantineBucket(java.util.Optional, java.lang.String) */ @Override public void emergencyQuarantineBucket(Optional<DataBucketBean> bucket, String quarantine_duration) { //TODO (ALEPH-19): Fill this in later (need distributed Akka working) throw new RuntimeException(ErrorUtils.NOT_YET_IMPLEMENTED); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#getLibraryConfig() */ @Override public SharedLibraryBean getTechnologyLibraryConfig() { return _mutable_state.technology_config.get(); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IHarvestContext#getModuleConfig() */ @Override public Map<String, SharedLibraryBean> getLibraryConfigs() { return _mutable_state.library_configs.isSet() ? _mutable_state.library_configs.get() : Collections.emptyMap(); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.shared_services.IUnderlyingService#getUnderlyingArtefacts() */ @Override public Collection<Object> getUnderlyingArtefacts() { if (_state_name == State.IN_TECHNOLOGY) { if (!_mutable_state.service_manifest_override.isSet()) { throw new RuntimeException(ErrorUtils.SERVICE_RESTRICTIONS); } return Stream.concat(Stream.of(this, _service_context, new SharedErrorUtils()) //(last one gives us the core_shared_lib) , _mutable_state.service_manifest_override.get().stream() .map(t2 -> _service_context.getService(t2._1(), t2._2())) .filter(service -> service.isPresent()) .flatMap(service -> service.get().getUnderlyingArtefacts().stream())) .collect(Collectors.toSet()); } else { throw new RuntimeException(ErrorUtils.TECHNOLOGY_NOT_MODULE); } } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.shared_services.IUnderlyingService#getUnderlyingPlatformDriver(java.lang.Class, java.util.Optional) */ @Override public <T> Optional<T> getUnderlyingPlatformDriver(final Class<T> driver_class, final Optional<String> driver_options) { return Optional.empty(); } @Override public void emitObject(Optional<DataBucketBean> bucket, Either<JsonNode, Map<String, Object>> object) { initializeOptionalOutput(bucket); final JsonNode obj_json = object.either(__ -> __, map -> (JsonNode) _mapper.convertValue(map, JsonNode.class)); _multi_writer.get().batchWrite(obj_json); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext#flushBatchOutput(java.util.Optional) */ @Override public CompletableFuture<?> flushBatchOutput(Optional<DataBucketBean> bucket) { // Flush data and logger final Stream<CompletableFuture<?>> flush_writer = Stream.of(_multi_writer.get().flushBatchOutput()); final Stream<CompletableFuture<?>> flush_logger = _mutable_state.bucket_loggers.values().stream() .map(l -> l.flush()); return CompletableFuture.allOf(Stream.concat(flush_writer, flush_logger).toArray(CompletableFuture[]::new)); } }