Java tutorial
/******************************************************************************* * Copyright 2016, The IKANOW Open Source Project. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ package com.ikanow.aleph2.core.shared.services; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.function.Function; import java.util.stream.Collectors; import scala.Tuple2; import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.LinkedHashMultimap; import com.google.common.collect.Multimap; import com.ikanow.aleph2.core.shared.utils.DataServiceUtils; import com.ikanow.aleph2.data_model.interfaces.data_services.IStorageService; import com.ikanow.aleph2.data_model.interfaces.shared_services.IDataServiceProvider; import com.ikanow.aleph2.data_model.interfaces.shared_services.IDataServiceProvider.IGenericDataService; import com.ikanow.aleph2.data_model.interfaces.shared_services.IDataWriteService; import com.ikanow.aleph2.data_model.interfaces.shared_services.IServiceContext; import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean; import com.ikanow.aleph2.data_model.objects.data_import.DataSchemaBean; import com.ikanow.aleph2.data_model.utils.Optionals; import com.ikanow.aleph2.data_model.utils.Tuples; /** This wraps a set of data services so that a set of simple commands can be applied sensibly to all the services within * (including cases where multiple services have the same implementation) * @author Alex * */ public class MultiDataService { final protected Multimap<IDataServiceProvider, String> _services; final protected Multimap<IDataWriteService.IBatchSubservice<JsonNode>, String> _batches = LinkedHashMultimap .create(); final protected Multimap<IDataWriteService<JsonNode>, String> _crud_onlys = LinkedHashMultimap.create(); final protected Multimap<IDataWriteService<JsonNode>, String> _all_cruds = LinkedHashMultimap.create(); final protected boolean _doc_write_mode; // (will use nulls vs optional/setonce for efficiency) protected IDataWriteService<JsonNode> _crud_index_service; protected IDataWriteService.IBatchSubservice<JsonNode> _batch_index_service; protected IDataWriteService<JsonNode> _crud_doc_service; protected IDataWriteService.IBatchSubservice<JsonNode> _batch_doc_service; protected IDataWriteService<JsonNode> _crud_data_warehouse_service; protected IDataWriteService.IBatchSubservice<JsonNode> _batch_data_warehouse_service; protected IDataWriteService<JsonNode> _crud_graph_service; protected IDataWriteService.IBatchSubservice<JsonNode> _batch_graph_service; protected IDataWriteService<JsonNode> _crud_columnar_service; protected IDataWriteService.IBatchSubservice<JsonNode> _batch_columnar_service; protected IDataWriteService<JsonNode> _crud_temporal_service; protected IDataWriteService.IBatchSubservice<JsonNode> _batch_temporal_service; protected IDataWriteService<JsonNode> _crud_storage_service; protected IDataWriteService.IBatchSubservice<JsonNode> _batch_storage_service; /** Wrapper for standard user c'tor - full version * @param bucket * @param context * @param maybe_get_storage_type - maps data service to an options string * @param maybe_get_buffer_name - maps data service to a secondary buffer name * @return */ public static MultiDataService getMultiWriter(final DataBucketBean bucket, final IServiceContext context, final Optional<Function<IGenericDataService, Optional<String>>> maybe_get_storage_type, final Optional<Function<IGenericDataService, Optional<String>>> maybe_get_buffer_name) { return new MultiDataService(bucket, context, maybe_get_storage_type, maybe_get_buffer_name); } /** Wrapper for standard user c'tor - minimal version * @param bucket * @param context * @return */ public static MultiDataService getMultiWriter(final DataBucketBean bucket, final IServiceContext context) { return new MultiDataService(bucket, context, Optional.empty(), Optional.empty()); } /** Wrapper for transient user c'tor * @param bucket * @param context * @param maybe_get_buffer_name - maps data service to a secondary buffer name * @return */ public static MultiDataService getTransientMultiWriter(final DataBucketBean bucket, final IServiceContext context, final String transient_storage_name, final Optional<Function<IGenericDataService, Optional<String>>> maybe_get_buffer_name) { return new MultiDataService(bucket, context, transient_storage_name, maybe_get_buffer_name); } /** User c'tor - transient case, which is much simpler * @param bucket * @param context * @param maybe_get_buffer_name */ protected MultiDataService(final DataBucketBean bucket, final IServiceContext context, final String transient_storage_name, final Optional<Function<IGenericDataService, Optional<String>>> maybe_get_buffer_name) { final String transient_storage_service = DataSchemaBean.StorageSchemaBean.name; _services = LinkedHashMultimap.create(); _services.put(context.getStorageService(), transient_storage_service); _doc_write_mode = false; Tuple2<IDataWriteService<JsonNode>, IDataWriteService.IBatchSubservice<JsonNode>> t2 = getWriters(bucket, context.getStorageService(), Optional.of(__ -> Optional.of(transient_storage_name)), maybe_get_buffer_name); _crud_storage_service = t2._1(); _batch_storage_service = t2._2(); storeWriters(t2, Arrays.asList(transient_storage_service)); } /** User c'tor - standard case * @param bucket * @param context * @param maybe_get_buffer_name */ protected MultiDataService(final DataBucketBean bucket, final IServiceContext context, final Optional<Function<IGenericDataService, Optional<String>>> maybe_get_storage_type, final Optional<Function<IGenericDataService, Optional<String>>> maybe_get_buffer_name) { // Insert or overwrite mode: _doc_write_mode = getWriteMode(bucket); _services = DataServiceUtils.selectDataServices(bucket.data_schema(), context); _services.asMap().entrySet().stream().forEach(kv -> { final Set<String> vals = kv.getValue().stream().collect(Collectors.toSet()); // (the order doesn't really matter here, so just to "look" sensible:) if (vals.contains(DataSchemaBean.SearchIndexSchemaBean.name)) { Tuple2<IDataWriteService<JsonNode>, IDataWriteService.IBatchSubservice<JsonNode>> t2 = getWriters( bucket, kv.getKey(), maybe_get_storage_type, maybe_get_buffer_name); _crud_index_service = t2._1(); _batch_index_service = t2._2(); storeWriters(t2, vals); } else if (vals.contains(DataSchemaBean.DocumentSchemaBean.name)) { Tuple2<IDataWriteService<JsonNode>, IDataWriteService.IBatchSubservice<JsonNode>> t2 = getWriters( bucket, kv.getKey(), maybe_get_storage_type, maybe_get_buffer_name); _crud_doc_service = t2._1(); _batch_doc_service = t2._2(); storeWriters(t2, vals); } else if (vals.contains(DataSchemaBean.DataWarehouseSchemaBean.name)) { Tuple2<IDataWriteService<JsonNode>, IDataWriteService.IBatchSubservice<JsonNode>> t2 = getWriters( bucket, kv.getKey(), maybe_get_storage_type, maybe_get_buffer_name); _crud_data_warehouse_service = t2._1(); _batch_data_warehouse_service = t2._2(); storeWriters(t2, vals); } else if (vals.contains(DataSchemaBean.GraphSchemaBean.name)) { Tuple2<IDataWriteService<JsonNode>, IDataWriteService.IBatchSubservice<JsonNode>> t2 = getWriters( bucket, kv.getKey(), maybe_get_storage_type, maybe_get_buffer_name); _crud_graph_service = t2._1(); _batch_graph_service = t2._2(); storeWriters(t2, vals); } else if (vals.contains(DataSchemaBean.ColumnarSchemaBean.name)) { Tuple2<IDataWriteService<JsonNode>, IDataWriteService.IBatchSubservice<JsonNode>> t2 = getWriters( bucket, kv.getKey(), maybe_get_storage_type, maybe_get_buffer_name); _crud_columnar_service = t2._1(); _batch_columnar_service = t2._2(); storeWriters(t2, vals); } else if (vals.contains(DataSchemaBean.TemporalSchemaBean.name)) { Tuple2<IDataWriteService<JsonNode>, IDataWriteService.IBatchSubservice<JsonNode>> t2 = getWriters( bucket, kv.getKey(), maybe_get_storage_type, maybe_get_buffer_name); _crud_temporal_service = t2._1(); _batch_temporal_service = t2._2(); storeWriters(t2, vals); } if (vals.contains(DataSchemaBean.StorageSchemaBean.name)) { // (note storage is a bit different, fix the "processed mode") Tuple2<IDataWriteService<JsonNode>, IDataWriteService.IBatchSubservice<JsonNode>> t2 = getWriters( bucket, kv.getKey(), maybe_get_storage_type.map(Optional::of).orElseGet(() -> { return Optional .of(__ -> Optional.of(IStorageService.StorageStage.processed.toString())); }), maybe_get_buffer_name); _crud_storage_service = t2._1(); _batch_storage_service = t2._2(); storeWriters(t2, vals); } }); } /** Utility for write mode * @param bucket * @return */ protected static boolean getWriteMode(DataBucketBean bucket) { return Optionals.of(() -> bucket.data_schema().document_schema()) .filter(ds -> Optional.ofNullable(ds.enabled()).orElse(true)) .filter(ds -> (null != ds.deduplication_policy()) || !Optionals.ofNullable(ds.deduplication_fields()).isEmpty() || !Optionals.ofNullable(ds.deduplication_contexts()).isEmpty()) // (ie dedup fields set) .isPresent(); } /** Utility for keeping unique count of all the batch and crud write services * @param t2 * @param vals */ private void storeWriters( final Tuple2<IDataWriteService<JsonNode>, IDataWriteService.IBatchSubservice<JsonNode>> t2, Collection<String> vals) { if (null != t2._2()) _batches.putAll(t2._2(), vals); else if (null != t2._1()) _crud_onlys.putAll(t2._1(), vals); if (null != t2._1()) _all_cruds.putAll(t2._1(), vals); } /** Returns a list of batch data writers * @return */ public Collection<IDataWriteService.IBatchSubservice<JsonNode>> getBatchWriters() { return Collections.unmodifiableCollection(_batches.keySet()); } /** Returns a list of CRUD data writers that don't have a batch writer * @return */ public Collection<IDataWriteService<JsonNode>> getCrudOnlyWriters() { return Collections.unmodifiableCollection(_crud_onlys.keySet()); } /** Returns a list of CRUD data writers that don't have a batch writer * @return */ public Collection<IDataWriteService<JsonNode>> getCrudWriters() { return Collections.unmodifiableCollection(_all_cruds.keySet()); } /** Returns a list of data providers * @return */ public Collection<IDataServiceProvider> getDataServices() { return Collections.unmodifiableCollection(_services.keySet()); } /** Returns a completable future for when all batches are flushed * @return */ public CompletableFuture<?> flushBatchOutput() { return CompletableFuture.allOf( getBatchWriters().stream().map(batch -> batch.flushOutput()).toArray(CompletableFuture[]::new)); } /** Output a JSON object decomposed as per the schema * @param obj_json * @return */ public boolean batchWrite(final JsonNode obj_json) { boolean mutable_written = false; if (_batch_index_service != null) { mutable_written = true; _batch_index_service.storeObject(obj_json, _doc_write_mode); } else if (_crud_index_service != null) { // (super slow) mutable_written = true; _crud_index_service.storeObject(obj_json, _doc_write_mode); } if (_batch_doc_service != null) { mutable_written = true; _batch_doc_service.storeObject(obj_json, _doc_write_mode); } else if (_crud_doc_service != null) { // (super slow) mutable_written = true; _crud_doc_service.storeObject(obj_json, _doc_write_mode); } if (_batch_data_warehouse_service != null) { mutable_written = true; _batch_data_warehouse_service.storeObject(obj_json, _doc_write_mode); } else if (_crud_data_warehouse_service != null) { // (super slow) mutable_written = true; _crud_data_warehouse_service.storeObject(obj_json, _doc_write_mode); } if (_batch_graph_service != null) { mutable_written = true; _batch_graph_service.storeObject(obj_json, _doc_write_mode); } else if (_crud_graph_service != null) { // (super slow) mutable_written = true; _crud_graph_service.storeObject(obj_json, _doc_write_mode); } if (_batch_columnar_service != null) { mutable_written = true; _batch_columnar_service.storeObject(obj_json, _doc_write_mode); } else if (_crud_columnar_service != null) { // (super slow) mutable_written = true; _crud_columnar_service.storeObject(obj_json, _doc_write_mode); } if (_batch_temporal_service != null) { mutable_written = true; _batch_temporal_service.storeObject(obj_json, _doc_write_mode); } else if (_crud_temporal_service != null) { // (super slow) mutable_written = true; _crud_temporal_service.storeObject(obj_json, _doc_write_mode); } if (_batch_storage_service != null) { mutable_written = true; _batch_storage_service.storeObject(obj_json); } else if (_crud_storage_service != null) { // (super slow) mutable_written = true; _crud_storage_service.storeObject(obj_json); } return mutable_written; } /** Handy utility * @param bucket * @param service_provider * @param maybe_get_buffer_name * @return */ protected static Tuple2<IDataWriteService<JsonNode>, IDataWriteService.IBatchSubservice<JsonNode>> getWriters( final DataBucketBean bucket, IDataServiceProvider service_provider, Optional<Function<IGenericDataService, Optional<String>>> maybe_get_storage_type, Optional<Function<IGenericDataService, Optional<String>>> maybe_get_buffer_name) { final IDataWriteService<JsonNode> crud_service; final IDataWriteService.IBatchSubservice<JsonNode> batch_storage_service; batch_storage_service = Optional .ofNullable(crud_service = Optional.of(service_provider).flatMap(s -> s.getDataService()) .flatMap(s -> s.getWritableDataService(JsonNode.class, bucket, maybe_get_storage_type.<String>flatMap(f -> f.apply(s)), maybe_get_buffer_name.<String>flatMap(f -> f.apply(s)))) .orElse(null)) .flatMap(IDataWriteService::getBatchWriteSubservice).orElse(null); return Tuples._2T(crud_service, batch_storage_service); } }