Java tutorial
/******************************************************************************* * Copyright 2015, The IKANOW Open Source Project. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ package com.ikanow.aleph2.management_db.mongodb.services; import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.Spliterator; import java.util.Spliterators; import java.util.function.Supplier; import java.util.function.UnaryOperator; import java.util.Date; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.recipes.leader.LeaderLatch; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import scala.Tuple2; import scala.Tuple3; import com.codepoetics.protonpack.StreamUtils; import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.databind.JsonMappingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.inject.Inject; import com.google.inject.Provider; import com.ikanow.aleph2.data_model.interfaces.data_services.IManagementDbService; import com.ikanow.aleph2.data_model.interfaces.shared_services.ICrudService; import com.ikanow.aleph2.data_model.interfaces.shared_services.ICrudService.Cursor; import com.ikanow.aleph2.data_model.interfaces.shared_services.IManagementCrudService; import com.ikanow.aleph2.data_model.interfaces.shared_services.IServiceContext; import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean; import com.ikanow.aleph2.data_model.objects.data_import.DataBucketStatusBean; import com.ikanow.aleph2.data_model.objects.shared.BasicMessageBean; import com.ikanow.aleph2.data_model.utils.CrudUtils; import com.ikanow.aleph2.data_model.utils.CrudUtils.CommonUpdateComponent; import com.ikanow.aleph2.data_model.utils.CrudUtils.SingleQueryComponent; import com.ikanow.aleph2.data_model.utils.BeanTemplateUtils; import com.ikanow.aleph2.data_model.utils.CrudUtils.UpdateComponent; import com.ikanow.aleph2.data_model.utils.ErrorUtils; import com.ikanow.aleph2.data_model.utils.FutureUtils.ManagementFuture; import com.ikanow.aleph2.data_model.utils.FutureUtils; import com.ikanow.aleph2.data_model.utils.JsonUtils; import com.ikanow.aleph2.data_model.utils.Lambdas; import com.ikanow.aleph2.data_model.utils.SetOnce; import com.ikanow.aleph2.data_model.utils.Tuples; import com.ikanow.aleph2.distributed_services.services.ICoreDistributedServices; import com.ikanow.aleph2.management_db.mongodb.data_model.MongoDbManagementDbConfigBean; /** This service looks for changes to IKANOW sources and applies them to data bucket beans * @author acp */ public class IkanowV1SyncService_Buckets { private static final Logger _logger = LogManager.getLogger(); private static final ObjectMapper _mapper = BeanTemplateUtils.configureMapper(Optional.empty()); protected final MongoDbManagementDbConfigBean _config; protected final IServiceContext _context; protected final Provider<IManagementDbService> _core_management_db; protected final Provider<IManagementDbService> _underlying_management_db; protected final ICoreDistributedServices _core_distributed_services; protected final SetOnce<MutexMonitor> _source_mutex_monitor = new SetOnce<MutexMonitor>(); protected final ScheduledExecutorService _mutex_scheduler = Executors.newScheduledThreadPool(1); protected final ScheduledExecutorService _source_scheduler = Executors.newScheduledThreadPool(1); protected SetOnce<ScheduledFuture<?>> _source_monitor_handle = new SetOnce<ScheduledFuture<?>>(); protected static int _num_leader_changes = 0; // (just for debugging/testing) public final static String SOURCE_MONITOR_MUTEX = "/app/aleph2/locks/v1/sources"; /** guice constructor * @param config - the management db configuration, includes whether this service is enabled * @param service_context - the service context providing all the required dependencies */ @Inject public IkanowV1SyncService_Buckets(final MongoDbManagementDbConfigBean config, final IServiceContext service_context) { _config = config; _context = service_context; _core_management_db = _context .getServiceProvider(IManagementDbService.class, IManagementDbService.CORE_MANAGEMENT_DB).get(); _underlying_management_db = _context.getServiceProvider(IManagementDbService.class, Optional.empty()).get(); _context.getService(ICoreDistributedServices.class, Optional.empty()).get(); _core_distributed_services = _context.getService(ICoreDistributedServices.class, Optional.empty()).get(); if (Optional.ofNullable(_config.v1_enabled()).orElse(false)) { // Launch the synchronization service // 1) Monitor sources _source_mutex_monitor.set(new MutexMonitor(SOURCE_MONITOR_MUTEX)); _mutex_scheduler.schedule(_source_mutex_monitor.get(), 250L, TimeUnit.MILLISECONDS); _source_monitor_handle .set(_source_scheduler.scheduleWithFixedDelay(new SourceMonitor(), 10L, 2L, TimeUnit.SECONDS)); //(give it 10 seconds before starting, let everything else settle down - eg give the bucket choose handler time to register) } } /** Immediately start (this is test code, so fine to overwrite the SetOnce) */ @SuppressWarnings("deprecation") public void start() { _source_monitor_handle.get().cancel(true); _source_monitor_handle .forceSet(_source_scheduler.scheduleWithFixedDelay(new SourceMonitor(), 1L, 1L, TimeUnit.SECONDS)); } /** Stop threads (just for testing I think) */ public void stop() { _source_monitor_handle.get().cancel(true); } //////////////////////////////////////////////////// //////////////////////////////////////////////////// // WORKER THREADS public class MutexMonitor implements Runnable { protected final String _path; protected final SetOnce<CuratorFramework> _curator = new SetOnce<CuratorFramework>(); protected final SetOnce<LeaderLatch> _leader_selector = new SetOnce<LeaderLatch>(); public MutexMonitor(final String path) { _path = path; } @Override public void run() { if (!_leader_selector.isSet()) { _curator.set(_core_distributed_services.getCuratorFramework()); try { final LeaderLatch Leader_latch = new LeaderLatch(_curator.get(), _path); Leader_latch.start(); _leader_selector.set(Leader_latch); } catch (Throwable e) { _logger.error(ErrorUtils.getLongForm("{0}", e)); } _logger.info("SourceMonitor: joined the leadership candidate cluster"); } } public boolean isLeader() { return _leader_selector.isSet() ? _leader_selector.get().hasLeadership() : false; } } public class SourceMonitor implements Runnable { private final SetOnce<ICrudService<JsonNode>> _v1_db = new SetOnce<ICrudService<JsonNode>>(); private boolean _last_state = false; /* (non-Javadoc) * @see java.lang.Runnable#run() */ @Override public void run() { if (!_source_mutex_monitor.get().isLeader()) { _last_state = false; return; } if (!_last_state) { _logger.info("SourceMonitor: now the leader"); _num_leader_changes++; _last_state = true; } if (!_v1_db.isSet()) { @SuppressWarnings("unchecked") final ICrudService<JsonNode> v1_config_db = _underlying_management_db.get() .getUnderlyingPlatformDriver(ICrudService.class, Optional.of("ingest.source")).get(); _v1_db.set(v1_config_db); _v1_db.get().optimizeQuery(Arrays.asList("extractType")); } try { // Synchronize synchronizeSources(_core_management_db.get().getDataBucketStore(), _underlying_management_db.get().getDataBucketStatusStore(), _v1_db.get()).get(); // (the get at the end just ensures that you don't get two of these scheduled results colliding - because of the 1-thread thread pool) } catch (Throwable t) { _logger.error(ErrorUtils.getLongForm("{0}", t)); } } } //////////////////////////////////////////////////// //////////////////////////////////////////////////// // CONTROL LOGIC /** Top level logic for source synchronization * @param bucket_mgmt * @param source_db */ protected CompletableFuture<Void> synchronizeSources(final IManagementCrudService<DataBucketBean> bucket_mgmt, final IManagementCrudService<DataBucketStatusBean> underlying_bucket_status_mgmt, final ICrudService<JsonNode> source_db) { return compareSourcesToBuckets_get(bucket_mgmt, source_db).thenApply(v1_v2 -> { return compareSourcesToBuckets_categorize(v1_v2); }).thenCompose(create_update_delete -> { if (create_update_delete._1().isEmpty() && create_update_delete._2().isEmpty() && create_update_delete._3().isEmpty()) { //(nothing to do) return CompletableFuture.completedFuture(null); } _logger.info(ErrorUtils.get("Found [create={0}, delete={1}, update={2}] sources", create_update_delete._1().size(), create_update_delete._2().size(), create_update_delete._3().size())); final List<CompletableFuture<Boolean>> l1 = create_update_delete._1().stream().parallel() .<Tuple2<String, ManagementFuture<?>>>map(key -> Tuples._2T(key, createNewBucket(key, bucket_mgmt, underlying_bucket_status_mgmt, source_db))) .<CompletableFuture<Boolean>>map( key_fres -> updateV1SourceStatus_top(key_fres._1(), key_fres._2(), true, source_db)) .collect(Collectors.toList()); ; final List<CompletableFuture<Boolean>> l2 = create_update_delete._2().stream().parallel() .<Tuple2<String, ManagementFuture<?>>>map( key -> Tuples._2T(key, deleteBucket(key, bucket_mgmt))) .<CompletableFuture<Boolean>>map(key_fres -> CompletableFuture.completedFuture(true)) // (don't update source in delete case obviously) .collect(Collectors.toList()); ; final List<CompletableFuture<Boolean>> l3 = create_update_delete._3().stream().parallel() .<Tuple2<String, ManagementFuture<?>>>map(key -> Tuples._2T(key, updateBucket(key, bucket_mgmt, underlying_bucket_status_mgmt, source_db))) .<CompletableFuture<Boolean>>map( key_fres -> updateV1SourceStatus_top(key_fres._1(), key_fres._2(), false, source_db)) .collect(Collectors.toList()); ; List<CompletableFuture<?>> retval = Arrays.asList(l1, l2, l3).stream().flatMap(l -> l.stream()) .collect(Collectors.toList()); ; return CompletableFuture.allOf(retval.toArray(new CompletableFuture[0])); }); } /** Top level handler for update status based on the result * @param key * @param fres * @param disable_on_failure * @param source_db * @return */ protected CompletableFuture<Boolean> updateV1SourceStatus_top(final String key, final ManagementFuture<?> fres, boolean disable_on_failure, ICrudService<JsonNode> source_db) { return fres.getManagementResults().<Boolean>thenCompose(res -> { try { fres.get(); // (check if the DB side call has failed) return updateV1SourceStatus(new Date(), key, res, disable_on_failure, source_db); } catch (Throwable e) { // DB-side call has failed, create ad hoc error final Collection<BasicMessageBean> errs = res.isEmpty() ? Arrays.asList(new BasicMessageBean(new Date(), false, "(unknown)", "(unknown)", null, ErrorUtils.getLongForm("{0}", e), null)) : res; return updateV1SourceStatus(new Date(), key, errs, disable_on_failure, source_db); } }); } /** Want to end up with 3 lists: * - v1 sources that don't exist in v2 (Create them) * - v2 sources that don't exist in v1 (Delete them) * - matching v1/v2 sources with different modified times (Update them) * @param to_compare * @returns a 3-tuple with "to create", "to delete", "to update" */ protected static Tuple3<Collection<String>, Collection<String>, Collection<String>> compareSourcesToBuckets_categorize( final Tuple2<Map<String, String>, Map<String, Date>> to_compare) { // Want to end up with 3 lists: // - v1 sources that don't exist in v2 (Create them) // - v2 sources that don't exist in v1 (Delete them) // - matching v1/v2 sources with different modified times (Update them) // (do delete first, then going to filter to_compare._1() on value==null) final Set<String> v2_not_v1 = new HashSet<String>(to_compare._2().keySet()); v2_not_v1.removeAll(to_compare._1().keySet()); // OK not worried about deletes any more, not interested in isApproved:false final Set<String> to_compare_approved = to_compare._1().entrySet().stream() .filter(kv -> null != kv.getValue() && !kv.getValue().isEmpty()).map(kv -> kv.getKey()) .collect(Collectors.toSet()); final Set<String> v1_and_v2 = new HashSet<String>(to_compare_approved); v1_and_v2.retainAll(to_compare._2().keySet()); final List<String> v1_and_v2_mod = v1_and_v2.stream().filter(id -> { try { final Date v1_date = parseJavaDate(to_compare._1().get(id)); final Date v2_date = to_compare._2().get(id); return v1_date.getTime() > v2_date.getTime(); } catch (Throwable e) { return false; // (just ignore) } }).collect(Collectors.toList()); final Set<String> v1_not_v2 = new HashSet<String>(to_compare_approved); v1_not_v2.removeAll(to_compare._2().keySet()); return Tuples._3T(v1_not_v2, v2_not_v1, v1_and_v2_mod); } //////////////////////////////////////////////////// //////////////////////////////////////////////////// // DB MANIPULATION - READ /** Gets a list of keys,modified from v1 and a list matching keys,modified from V2 (ie _id minus ';') * @param bucket_mgmt * @param source_db * @return tuple of id-vs-(date-or-null-if-not-approved) for v1, id-vs-date for v2 */ protected static CompletableFuture<Tuple2<Map<String, String>, Map<String, Date>>> compareSourcesToBuckets_get( final IManagementCrudService<DataBucketBean> bucket_mgmt, final ICrudService<JsonNode> source_db) { // (could make this more efficient by having a regular "did something happen" query with a slower "get everything and resync) // (don't forget to add "modified" to the compund index though) CompletableFuture<Cursor<JsonNode>> f_v1_sources = source_db.getObjectsBySpec( CrudUtils.allOf().when("extractType", "V2DataBucket"), Arrays.asList("key", "modified", "isApproved"), true); return f_v1_sources.<Map<String, String>>thenApply(v1_sources -> { return StreamSupport.stream(v1_sources.spliterator(), false).collect(Collectors.toMap( j -> safeJsonGet("key", j).asText(), j -> safeJsonGet("isApproved", j).asBoolean() ? safeJsonGet("modified", j).asText() : "")); }).<Tuple2<Map<String, String>, Map<String, Date>>>thenCompose(v1_key_datestr_map -> { final SingleQueryComponent<DataBucketBean> bucket_query = CrudUtils.allOf(DataBucketBean.class) .rangeIn(DataBucketBean::_id, "aleph...bucket.", true, "aleph...bucket/", true); return bucket_mgmt.getObjectsBySpec(bucket_query, Arrays.asList(JsonUtils._ID, "modified"), true) .<Tuple2<Map<String, String>, Map<String, Date>>>thenApply(c -> { final Map<String, Date> v2_key_date_map = StreamSupport.stream(c.spliterator(), false) .collect(Collectors.toMap(b -> getV1SourceKeyFromBucketId(b._id()), // (convert to v1 source key format) b -> b.modified())); return Tuples._2T(v1_key_datestr_map, v2_key_date_map); }); }); } //////////////////////////////////////////////////// //////////////////////////////////////////////////// // DB MANIPULATION - WRITE /** Create a new bucket * @param key * @param bucket_mgmt * @param bucket_status_mgmt * @param source_db * @return */ protected static ManagementFuture<Supplier<Object>> createNewBucket(final String key, final IManagementCrudService<DataBucketBean> bucket_mgmt, final IManagementCrudService<DataBucketStatusBean> underlying_bucket_status_mgmt, final ICrudService<JsonNode> source_db) { _logger.info(ErrorUtils.get("Found new source {0}, creating bucket", key)); // Create a status bean: final SingleQueryComponent<JsonNode> v1_query = CrudUtils.allOf().when("key", key); return FutureUtils.denestManagementFuture(source_db.getObjectBySpec(v1_query) .<ManagementFuture<Supplier<Object>>>thenApply(Lambdas.wrap_u(jsonopt -> { final DataBucketBean new_object = getBucketFromV1Source(jsonopt.get()); final boolean is_now_suspended = safeJsonGet("searchCycle_secs", jsonopt.get()).asInt(1) < 0; final DataBucketStatusBean status_bean = BeanTemplateUtils.build(DataBucketStatusBean.class) .with(DataBucketStatusBean::_id, new_object._id()) .with(DataBucketStatusBean::bucket_path, new_object.full_name()) .with(DataBucketStatusBean::suspended, is_now_suspended).done().get(); return FutureUtils.denestManagementFuture( underlying_bucket_status_mgmt.storeObject(status_bean, true).thenApply(__ -> { final ManagementFuture<Supplier<Object>> ret = bucket_mgmt.storeObject(new_object); return ret; })); })).exceptionally(e -> { return FutureUtils .<Supplier<Object>>createManagementFuture( FutureUtils.returnError(new RuntimeException(e)), CompletableFuture.completedFuture(Arrays.asList(new BasicMessageBean(new Date(), false, "IkanowV1SyncService_Buckets", "createNewBucket", null, ErrorUtils.getLongForm("{0}", e), null)))); })); } /** Delete a bucket * @param key * @param bucket_mgmt * @return */ protected static ManagementFuture<Boolean> deleteBucket(final String key, final IManagementCrudService<DataBucketBean> bucket_mgmt) { _logger.info(ErrorUtils.get("Source {0} was deleted, deleting bucket", key)); return bucket_mgmt.deleteObjectById(getBucketIdFromV1SourceKey(key)); } /** Update a bucket based on a new V1 source * @param key * @param bucket_mgmt * @param underlying_bucket_status_mgmt * @param source_db * @return */ @SuppressWarnings("unchecked") protected static ManagementFuture<Supplier<Object>> updateBucket(final String key, final IManagementCrudService<DataBucketBean> bucket_mgmt, final IManagementCrudService<DataBucketStatusBean> underlying_bucket_status_mgmt, final ICrudService<JsonNode> source_db) { _logger.info(ErrorUtils.get("Source {0} was modified, updating bucket", key)); // Get the full source from V1 // .. and from V2: the existing bucket and the existing status // OK first off, we're immediately going to update the bucket's modified time // since otherwise if the update fails then we'll get stuck updating it every iteration... // (ie this is the reason we set isApproved:false in the create case) //(this ugliness just handles the test case already running on the underlying service) final ICrudService<DataBucketBean> underlying_bucket_db = bucket_mgmt .getUnderlyingPlatformDriver(ICrudService.class, Optional.empty()).orElse(bucket_mgmt); underlying_bucket_db.updateObjectById(getBucketIdFromV1SourceKey(key), CrudUtils.update(DataBucketBean.class).set(DataBucketBean::modified, new Date())); final SingleQueryComponent<JsonNode> v1_query = CrudUtils.allOf().when("key", key); final CompletableFuture<Optional<JsonNode>> f_v1_source = source_db.getObjectBySpec(v1_query); return FutureUtils.denestManagementFuture( f_v1_source.<ManagementFuture<Supplier<Object>>>thenApply(Lambdas.wrap_u(v1_source -> { // Once we have all the queries back, get some more information final boolean is_now_suspended = safeJsonGet("searchCycle_secs", v1_source.get()).asInt(1) < 0; final DataBucketBean new_object = getBucketFromV1Source(v1_source.get()); // (Update status in underlying status store so don't trip a spurious harvest call) CompletableFuture<?> update = underlying_bucket_status_mgmt.updateObjectById( getBucketIdFromV1SourceKey(key), CrudUtils.update(DataBucketStatusBean.class) .set(DataBucketStatusBean::suspended, is_now_suspended)); // Then update the management db return FutureUtils.denestManagementFuture( update.thenApply(__ -> bucket_mgmt.storeObject(new_object, true))); })).exceptionally(e -> { return FutureUtils .<Supplier<Object>>createManagementFuture( FutureUtils.returnError(new RuntimeException(e)), CompletableFuture.completedFuture(Arrays.asList(new BasicMessageBean(new Date(), false, "IkanowV1SyncService_Buckets", "updateBucket", null, ErrorUtils.getLongForm("{0}", e), null)))); })); } /** Takes a collection of results from the management side-channel, and uses it to update a harvest node * @param key - source key / bucket id * @param status_messages * @param source_db */ protected static CompletableFuture<Boolean> updateV1SourceStatus(final Date main_date, final String key, final Collection<BasicMessageBean> status_messages, final boolean set_approved_state, final ICrudService<JsonNode> source_db) { final String message_block = status_messages.stream().map(msg -> { return "[" + msg.date() + "] " + msg.source() + " (" + msg.command() + "): " + (msg.success() ? "INFO" : "ERROR") + ": " + msg.message(); }).collect(Collectors.joining("\n")); final boolean any_errors = status_messages.stream().anyMatch(msg -> !msg.success()); @SuppressWarnings("deprecation") final CommonUpdateComponent<JsonNode> update_1 = CrudUtils.update() .set("harvest.harvest_status", (any_errors ? "error" : "success")) .set("harvest.harvest_message", "[" + main_date.toGMTString() + "] Bucket synchronization:\n" + (message_block.isEmpty() ? "(no messages)" : message_block)); final UpdateComponent<JsonNode> update = set_approved_state ? update_1.set("isApproved", !any_errors) : update_1; final SingleQueryComponent<JsonNode> v1_query = CrudUtils.allOf().when("key", key); final CompletableFuture<Boolean> update_res = source_db.updateObjectBySpec(v1_query, Optional.empty(), update); return update_res; } //////////////////////////////////////////////////// //////////////////////////////////////////////////// protected static String getBucketIdFromV1SourceKey(final String key) { return key + ';'; } protected static String getV1SourceKeyFromBucketId(final String _id) { return _id.endsWith(";") ? _id.substring(0, _id.length() - 1) : _id; } // LOW LEVEL UTILS /** Builds a V2 bucket out of a V1 source * @param src_json * @return * @throws JsonParseException * @throws JsonMappingException * @throws IOException * @throws ParseException */ protected static DataBucketBean getBucketFromV1Source(final JsonNode src_json) throws JsonParseException, JsonMappingException, IOException, ParseException { // (think we'll use key instead of _id): //final String _id = safeJsonGet(JsonUtils._ID, src_json).asText(); final String key = safeJsonGet("key", src_json).asText(); final String created = safeJsonGet("created", src_json).asText(); final String modified = safeJsonGet("modified", src_json).asText(); final String title = safeJsonGet("title", src_json).asText(); final String description = safeJsonGet("description", src_json).asText(); final String owner_id = safeJsonGet("ownerId", src_json).asText(); final JsonNode tags = safeJsonGet("tags", src_json); // collection of strings //final JsonNode comm_ids = safeJsonGet("communityIds", src_json); // collection of strings final JsonNode px_pipeline = safeJsonGet("processingPipeline", src_json); // collection of JSON objects, first one should have data_bucket final JsonNode px_pipeline_first_el = ((ObjectNode) px_pipeline.get(0)) .without(Arrays.asList("test_params")); final JsonNode data_bucket_tmp = safeJsonGet("data_bucket", px_pipeline_first_el);// (WARNING: mutable, see below) final JsonNode scripting = safeJsonGet("scripting", data_bucket_tmp); // HANDLE SUBSTITUTION final String sub_prefix = Optional.ofNullable(scripting.get("sub_prefix")).map(x -> x.asText()) .orElse("$$SCRIPT_"); final String sub_suffix = Optional.ofNullable(scripting.get("sub_suffix")).map(x -> x.asText()) .orElse("$$"); final List<UnaryOperator<String>> search_replace = StreamSupport .stream(Spliterators.spliteratorUnknownSize(scripting.fieldNames(), Spliterator.ORDERED), false) .filter(f -> !f.equals("sub_prefix") && !f.equals("sub_suffix")) // (remove non language fields) .map(lang -> Tuples._2T(scripting.get(lang), lang)) // Get (separator regex, entire script, sub prefix) .map(scriptobj_lang -> Tuples._3T(safeJsonGet("separator_regex", scriptobj_lang._1()).asText(), safeJsonGet("script", scriptobj_lang._1()).asText(), sub_prefix + scriptobj_lang._2())) // Split each "entire script" up into blocks of format (bloc, lang) .<Stream<Tuple2<String, String>>>map(regex_script_lang -> Stream.concat( Stream.of(Tuples._2T(regex_script_lang._2(), regex_script_lang._3())), regex_script_lang._1().isEmpty() ? Stream.of(Tuples._2T(regex_script_lang._2(), regex_script_lang._3())) : Arrays.stream(regex_script_lang._2().split(regex_script_lang._1())) .<Tuple2<String, String>>map(s -> Tuples._2T(s, regex_script_lang._3())))) // Associate a per-lang index with each script block -> (replacement, string_sub) .<Tuple2<String, String>>flatMap(stream -> StreamUtils.zip(stream, Stream.iterate(0, i -> i + 1), (script_lang, i) -> Tuples._2T( script_lang._1().replace("\"", "\\\"").replace("\n", "\\n").replace("\r", "\\r"), i == 0 ? script_lang._2() + sub_suffix // (entire thing) : script_lang._2() + "_" + i + sub_suffix))) //(broken down components) .<UnaryOperator<String>>map(t2 -> (String s) -> s.replace(t2._2(), t2._1())) //(need to escape "s and newlines) .collect(Collectors.toList()); // Apply the list of transforms to the string ((ObjectNode) data_bucket_tmp).remove("scripting"); // (WARNING: mutable) final String data_bucket_str = search_replace.stream().reduce(data_bucket_tmp.toString(), (acc, s) -> s.apply(acc), (acc1, acc2) -> acc1); // Convert back to the bucket JSON final JsonNode data_bucket = ((ObjectNode) _mapper.readTree(data_bucket_str)) .without(Arrays.asList("test_params")); final DataBucketBean bucket = BeanTemplateUtils.build(data_bucket, DataBucketBean.class) .with(DataBucketBean::_id, getBucketIdFromV1SourceKey(key)) .with(DataBucketBean::created, parseJavaDate(created)) .with(DataBucketBean::modified, parseJavaDate(modified)).with(DataBucketBean::display_name, title) .with(DataBucketBean::description, description).with(DataBucketBean::owner_id, owner_id) .with(DataBucketBean::tags, StreamSupport.stream(tags.spliterator(), false).map(jt -> jt.asText()) .collect(Collectors.toSet())) .done().get(); return bucket; } /** Gets a JSON field that may not be present (justs an empty JsonNode if no) * @param fieldname * @param src * @return */ protected static JsonNode safeJsonGet(String fieldname, JsonNode src) { final JsonNode j = Optional.ofNullable(src.get(fieldname)).orElse(JsonNodeFactory.instance.objectNode()); //DEBUG //System.out.println(j); return j; } /** Quick utility to parse the result of Date::toString back into a date * @param java_date_tostring_format * @return * @throws ParseException */ protected static Date parseJavaDate(String java_date_tostring_format) throws ParseException { if (java_date_tostring_format.isEmpty()) return new Date(); try { return new SimpleDateFormat("EEE MMM d HH:mm:ss zzz yyyy").parse(java_date_tostring_format); } catch (Exception e) { try { return new SimpleDateFormat("MMM d, yyyy hh:mm:ss a zzz").parse(java_date_tostring_format); } catch (Exception ee) { return new SimpleDateFormat("d MMM yyyy HH:mm:ss zzz").parse(java_date_tostring_format); } } } }