Java tutorial
/******************************************************************************* * Copyright 2016, The IKANOW Open Source Project. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ package com.ikanow.aleph2.graph.titan.services; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Random; import java.util.Set; import java.util.function.Consumer; import java.util.stream.IntStream; import java.util.stream.Stream; import org.apache.logging.log4j.Level; import org.apache.tinkerpop.gremlin.structure.Vertex; import scala.Tuple2; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterators; import com.ikanow.aleph2.data_model.interfaces.data_analytics.IBatchRecord; import com.ikanow.aleph2.data_model.interfaces.data_import.IEnrichmentBatchModule; import com.ikanow.aleph2.data_model.interfaces.data_import.IEnrichmentModuleContext; import com.ikanow.aleph2.data_model.interfaces.data_services.IGraphService; import com.ikanow.aleph2.data_model.interfaces.shared_services.IBucketLogger; import com.ikanow.aleph2.data_model.interfaces.shared_services.ISecurityService; import com.ikanow.aleph2.data_model.interfaces.shared_services.IServiceContext; import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean; import com.ikanow.aleph2.data_model.objects.data_import.EnrichmentControlMetadataBean; import com.ikanow.aleph2.data_model.objects.data_import.GraphAnnotationBean; import com.ikanow.aleph2.data_model.objects.data_import.DataSchemaBean.GraphSchemaBean; import com.ikanow.aleph2.data_model.objects.shared.SharedLibraryBean; import com.ikanow.aleph2.data_model.utils.BeanTemplateUtils; import com.ikanow.aleph2.data_model.utils.BucketUtils; import com.ikanow.aleph2.data_model.utils.CrudUtils; import com.ikanow.aleph2.data_model.utils.ErrorUtils; import com.ikanow.aleph2.data_model.utils.Lambdas; import com.ikanow.aleph2.data_model.utils.Optionals; import com.ikanow.aleph2.data_model.utils.SetOnce; import com.ikanow.aleph2.data_model.utils.Tuples; import com.ikanow.aleph2.data_model.utils.UuidUtils; import com.ikanow.aleph2.graph.titan.data_model.GraphBuilderConfigBean; import com.ikanow.aleph2.graph.titan.utils.TitanGraphBuildingUtils; import com.ikanow.aleph2.graph.titan.utils.TitanGraphBuildingUtils.MutableStatsBean; import com.thinkaurelius.titan.core.TitanException; import com.thinkaurelius.titan.core.TitanGraph; import com.thinkaurelius.titan.core.TitanTransaction; import com.thinkaurelius.titan.diskstorage.TemporaryBackendException; import com.thinkaurelius.titan.diskstorage.locking.PermanentLockingException; /** Service for building the edges and vertices from the incoming records and updating any existing entries in the database * NOTE: there should be a non-tech-specific GraphBuilderEnrichmentService in the analytics context library that wraps this one * so that user code doesn't reference anything titan specific (the analytics context will grab the right one using the getUnderlyingArtefacts from the GraphService) * @author Alex * */ public class TitanGraphBuilderEnrichmentService implements IEnrichmentBatchModule { public final static String UUID = UuidUtils.get().getRandomUuid().split("-")[4]; protected final SetOnce<IEnrichmentBatchModule> _custom_graph_decomp_handler = new SetOnce<>(); protected final SetOnce<GraphDecompEnrichmentContext> _custom_graph_decomp_context = new SetOnce<>(); protected final SetOnce<IEnrichmentBatchModule> _custom_graph_merge_handler = new SetOnce<>(); protected final SetOnce<GraphMergeEnrichmentContext> _custom_graph_merge_context = new SetOnce<>(); protected final SetOnce<GraphSchemaBean> _config = new SetOnce<>(); protected final SetOnce<DataBucketBean> _bucket = new SetOnce<>(); protected final SetOnce<IServiceContext> _service_context = new SetOnce<>(); protected final SetOnce<Tuple2<String, ISecurityService>> _security_context = new SetOnce<>(); protected final SetOnce<IBucketLogger> _logger = new SetOnce<>(); protected final SetOnce<MutableStatsBean> _mutable_stats = new SetOnce<>(); protected final SetOnce<TitanGraph> _titan = new SetOnce<>(); protected final Set<ObjectNode> _mutable_new_vertex_keys = new HashSet<>(); // Special test mode protected LinkedList<TitanException> _MUTABLE_TEST_ERRORS = new LinkedList<>(); protected final static int _MAX_ATTEMPT_NUM = 9; protected final static Integer[] _BACKOFF_TIMES_MS = { 50, 100, 250, 1500, 3000, 6000, 12000, 24000, 48000, 96000 }; // (final one never called, that's the one we bail out on) /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IEnrichmentBatchModule#onStageInitialize(com.ikanow.aleph2.data_model.interfaces.data_import.IEnrichmentModuleContext, com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, com.ikanow.aleph2.data_model.objects.data_import.EnrichmentControlMetadataBean, scala.Tuple2, java.util.Optional) */ @Override public void onStageInitialize(IEnrichmentModuleContext context, DataBucketBean bucket, EnrichmentControlMetadataBean control, Tuple2<ProcessingStage, ProcessingStage> previous_next, Optional<List<String>> next_grouping_fields) { final GraphBuilderConfigBean dedup_config = BeanTemplateUtils .from(Optional.ofNullable(control.config()).orElse(Collections.emptyMap()), GraphBuilderConfigBean.class) .get(); final GraphSchemaBean graph_schema = Optional.ofNullable(dedup_config.graph_schema_override()) .orElse(bucket.data_schema().graph_schema()); //(exists by construction) _config.set( BeanTemplateUtils.clone(graph_schema) .with(GraphSchemaBean::custom_finalize_all_objects, Optional.ofNullable(graph_schema.custom_finalize_all_objects()).orElse(false)) .with(GraphSchemaBean::deduplication_fields, Optional.ofNullable(graph_schema.deduplication_fields()) .orElse(Arrays.asList(GraphAnnotationBean.name, GraphAnnotationBean.type))) .done()); // Get the Titan graph _service_context.set(context.getServiceContext()); _logger.set(context.getLogger(Optional.of(bucket))); _security_context.set(Tuples._2T(bucket.owner_id(), _service_context.get().getSecurityService())); _bucket.set(bucket); _mutable_stats.set(new MutableStatsBean()); _service_context.get().getService(IGraphService.class, Optional.ofNullable(graph_schema.service_name())) .flatMap(graph_service -> graph_service.getUnderlyingPlatformDriver(TitanGraph.class, Optional.empty())) .ifPresent(titan -> _titan.set(titan)); ; // Set up decomposition enrichment final Optional<EnrichmentControlMetadataBean> custom_decomp_config = Optionals .ofNullable(graph_schema.custom_decomposition_configs()).stream().findFirst(); custom_decomp_config.ifPresent(cfg -> { final Optional<String> entry_point = Optional.ofNullable(cfg.entry_point()).map(Optional::of) .orElseGet(() -> { // Get the shared library bean: return BucketUtils .getBatchEntryPoint(context.getServiceContext().getCoreManagementDbService() .readOnlyVersion().getSharedLibraryStore() .getObjectBySpec(CrudUtils.anyOf(SharedLibraryBean.class) .when(SharedLibraryBean::_id, cfg.module_name_or_id()).when( SharedLibraryBean::path_name, cfg.module_name_or_id())) .join() .map(bean -> (Map<String, SharedLibraryBean>) ImmutableMap .of(cfg.module_name_or_id(), bean)) .orElse(Collections.<String, SharedLibraryBean>emptyMap()), cfg); }); entry_point.ifPresent( Lambdas.wrap_consumer_u(ep -> _custom_graph_decomp_handler.set((IEnrichmentBatchModule) Class .forName(ep, true, Thread.currentThread().getContextClassLoader()).newInstance()))); _custom_graph_decomp_context.set(new GraphDecompEnrichmentContext(context, _config.get())); _custom_graph_decomp_handler.optional() .ifPresent(base_module -> base_module.onStageInitialize(_custom_graph_decomp_context.get(), bucket, cfg, previous_next, next_grouping_fields)); }); // Set up merging enrichment final Optional<EnrichmentControlMetadataBean> custom_merge_config = Optionals .ofNullable(graph_schema.custom_merge_configs()).stream().findFirst(); custom_merge_config.ifPresent(cfg -> { final Optional<String> entry_point = Optional.ofNullable(cfg.entry_point()).map(Optional::of) .orElseGet(() -> { // Get the shared library bean: return BucketUtils .getBatchEntryPoint(context.getServiceContext().getCoreManagementDbService() .readOnlyVersion().getSharedLibraryStore() .getObjectBySpec(CrudUtils.anyOf(SharedLibraryBean.class) .when(SharedLibraryBean::_id, cfg.module_name_or_id()).when( SharedLibraryBean::path_name, cfg.module_name_or_id())) .join() .map(bean -> (Map<String, SharedLibraryBean>) ImmutableMap .of(cfg.module_name_or_id(), bean)) .orElse(Collections.<String, SharedLibraryBean>emptyMap()), cfg); }); entry_point.ifPresent( Lambdas.wrap_consumer_u(ep -> _custom_graph_merge_handler.set((IEnrichmentBatchModule) Class .forName(ep, true, Thread.currentThread().getContextClassLoader()).newInstance()))); _custom_graph_merge_context.set(new GraphMergeEnrichmentContext(context, _config.get())); _custom_graph_merge_handler.optional() .ifPresent(base_module -> base_module.onStageInitialize(_custom_graph_merge_context.get(), bucket, cfg, previous_next, next_grouping_fields)); }); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IEnrichmentBatchModule#onObjectBatch(java.util.stream.Stream, java.util.Optional, java.util.Optional) */ @Override public void onObjectBatch(Stream<Tuple2<Long, IBatchRecord>> batch, Optional<Integer> batch_size, Optional<JsonNode> grouping_key) { // Get user assets: final List<ObjectNode> vertices_and_edges = TitanGraphBuildingUtils.buildGraph_getUserGeneratedAssets(batch, batch_size, grouping_key, _custom_graph_decomp_handler.optional() .map(handler -> Tuples._2T(handler, _custom_graph_decomp_context.get()))); final MutableStatsBean mutable_stats = new MutableStatsBean(); tryRecoverableTransaction(mutable_tx -> { final Stream<ObjectNode> copy_vertices_and_edges = vertices_and_edges.stream().map(o -> o.deepCopy()); // Fill in transaction mutable_stats.reset(); TitanGraphBuildingUtils.buildGraph_handleMerge(mutable_tx, _config.get(), _security_context.get(), _logger.optional(), mutable_stats, _mutable_new_vertex_keys, _custom_graph_merge_handler .optional().map(handler -> Tuples._2T(handler, _custom_graph_merge_context.get())), _bucket.get(), TitanGraphBuildingUtils.buildGraph_collectUserGeneratedAssets(mutable_tx, _config.get(), _security_context.get(), _logger.optional(), _bucket.get(), mutable_stats, copy_vertices_and_edges)); //(test mode for errors) if (!_MUTABLE_TEST_ERRORS.isEmpty()) { throw _MUTABLE_TEST_ERRORS.pop(); } }, () -> { _logger.optional().ifPresent(logger -> { logger.log(Level.DEBUG, ErrorUtils.lazyBuildMessage(true, () -> "GraphBuilderEnrichmentService", () -> "system.onObjectBatch", () -> null, () -> ErrorUtils.get( "Graph stats: V_emitted={0} V_matched={1} V_created={2} V_updated={3} V_errors={4} E_emitted={5} E_matched={6} E_created={7} E_updated={8} E_errors={9} (uuid={10})", mutable_stats.vertices_emitted, mutable_stats.vertex_matches_found, mutable_stats.vertices_created, mutable_stats.vertices_updated, mutable_stats.vertex_errors, mutable_stats.edges_emitted, mutable_stats.edge_matches_found, mutable_stats.edges_created, mutable_stats.edges_updated, mutable_stats.edge_errors, UUID), () -> BeanTemplateUtils.toMap(mutable_stats))); }); _mutable_stats.get().combine(mutable_stats); }); } protected void tryRecoverableTransaction(final Consumer<TitanTransaction> transaction, final Runnable on_success) { final Random random_generator = new Random(java.util.UUID.randomUUID().getMostSignificantBits()); IntStream.range(0, 1 + _MAX_ATTEMPT_NUM).boxed().filter(i -> { //(at most 5 attempts) try { // Create transaction /**/ //TRACE System.err.println(new java.util.Date().toString() + ": GRABBING TRANS " + i); final TitanTransaction mutable_tx = _titan.get().newTransaction(); /**/ //TRACE System.err.println( new java.util.Date().toString() + ": GRABBED TRANS " + i + " toS=" + mutable_tx.toString()); try { transaction.accept(mutable_tx); } catch (Exception e) { //(close the transaction without saving) /**/ //TRACE System.err.println(new java.util.Date().toString() + ": (ERROR) ROLLING BACK TRANS " + i); mutable_tx.rollback(); throw e; } /**/ //TRACE System.err.println(new java.util.Date().toString() + ": COMMITTING TRANS " + i + " tx=" + mutable_tx.hasModifications() + " open=" + mutable_tx.isOpen() + " toS=" + mutable_tx.toString()); // Attempt to commit mutable_tx.commit(); /**/ //TRACE System.err.println(new java.util.Date().toString() + ": COMMITTED TRANS " + i); on_success.run(); return true; // (ie ends the loop) } catch (TitanException e) { if ((i >= _MAX_ATTEMPT_NUM) || !isRecoverableError(e)) { /**/ //DEBUG System.err.println(new java.util.Date().toString() + ": HERE2 NON_RECOV: " + i + " vs " + _MAX_ATTEMPT_NUM + ErrorUtils.getLongForm(" error={0}", e)); //e.printStackTrace(); _logger.optional().ifPresent(logger -> { logger.log(Level.ERROR, ErrorUtils.lazyBuildMessage(false, () -> "GraphBuilderEnrichmentService", () -> "system.onStageComplete", () -> null, () -> ErrorUtils.getLongForm( "Failed to commit transaction due to local conflicts, attempt_num={1} error={0} (uuid={2})", e, i, UUID), () -> null)); }); throw e; } // If we're here, we're going to retry the transaction /**/ //DEBUG System.err.println(new java.util.Date().toString() + ": HERE3 RECOVERABLE" + ErrorUtils.getLongForm(" error={0}", e)); final int min_sleep_time = _BACKOFF_TIMES_MS[i] / 2; final int sleep_time = min_sleep_time + random_generator.nextInt(min_sleep_time); _logger.optional().ifPresent(logger -> { logger.log(Level.DEBUG, ErrorUtils.lazyBuildMessage(false, () -> "GraphBuilderEnrichmentService", () -> "system.onStageComplete", () -> null, () -> ErrorUtils.get( "Failed to commit transaction due to local conflicts, attempt_num={0} (uuid={1} sleep_ms={2})", i, UUID, sleep_time), () -> null)); }); try { Thread.sleep(sleep_time); } catch (Exception interrupted) { } return false; // (If it's a versioning conflict then try again) } /**/ //TRACE: catch (Throwable x) { System.err.println(new java.util.Date().toString() + ": HERE1 OTHER ERR" + ErrorUtils.getLongForm(" error={0}", x)); //x.printStackTrace(); throw x; } }).findFirst() // ie stop as soon as we have successfully transacted ; } /** Utility to check for recoverable errors * @param e * @return */ private static boolean isRecoverableError_internal(Throwable e) { // (temporary backend encompasses temporary locking) return (PermanentLockingException.class.isAssignableFrom(e.getClass()) || TemporaryBackendException.class.isAssignableFrom(e.getClass())); } /** Utility to check for recoverable errors * @param e * @return */ protected static boolean isRecoverableError(TitanException e) { if (null != e.getCause()) { if (isRecoverableError_internal(e.getCause())) // versioning error, repeat transaction { return true; } else if ((null != e.getCause().getCause()) && isRecoverableError_internal(e.getCause().getCause())) //versioning error, repeat transaction { return true; } } return false; } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_import.IEnrichmentBatchModule#onStageComplete(boolean) */ @Override public void onStageComplete(boolean is_original) { // First off sleep somewhere between 1 and 4s to ensure the blocks get out of sync final int sleep_time = Lambdas.get(() -> { if (is_original) { final Random random_generator = new Random(java.util.UUID.randomUUID().getMostSignificantBits()); int local_sleep_time = 1000 + random_generator.nextInt(3000); try { Thread.sleep(local_sleep_time); } catch (Exception e) { } return local_sleep_time; } else return 0; }); final int batch_size = 250; // OK now going to start merging new records one final time: final MutableStatsBean global_combine_stats = new MutableStatsBean(); final MutableStatsBean combine_stats = new MutableStatsBean(); Iterators.partition(_mutable_new_vertex_keys.iterator(), batch_size).forEachRemaining(batch -> { tryRecoverableTransaction(tx -> { final MutableStatsBean per_batch_stats = new MutableStatsBean(); // (want nodes with multiple keys including my own, in case something else has latched onto one of my keys in the meantime...) final Map<JsonNode, List<Vertex>> grouped_vertices = TitanGraphBuildingUtils.getGroupedVertices( batch, tx, _config.get().deduplication_fields(), vertex -> Optionals.streamOf(vertex.properties(GraphAnnotationBean.a2_p), false) .anyMatch(p -> _bucket.get().full_name().equals(p.value()))); //TRACE // _logger.optional().ifPresent(logger -> { // logger.log(Level.DEBUG, // ErrorUtils.lazyBuildMessage(true, // () -> "GraphBuilderEnrichmentService", // () -> "system.onStageComplete", // () -> null, // () -> ErrorUtils.get("TRACE: {0} vs {1} (uuid={2} sleep_ms={3})", // batch.toString(), // grouped_vertices.toString(), // UUID, sleep_time // ), // () -> BeanTemplateUtils.toMap(combine_stats))); // }); TitanGraphBuildingUtils.mergeDuplicates(tx, _bucket.get().full_name(), grouped_vertices, per_batch_stats); global_combine_stats.combine(per_batch_stats); combine_stats.reset(); combine_stats.combine(per_batch_stats); }, () -> { _logger.optional().ifPresent(logger -> { logger.log(Level.DEBUG, ErrorUtils.lazyBuildMessage(true, () -> "GraphBuilderEnrichmentService", () -> "system.onStageComplete", () -> null, () -> ErrorUtils.get( "Batch merge stats: V_matched={0} V_updated={1} E_matched={2} E_updated={3} (uuid={4})", combine_stats.vertex_matches_found, combine_stats.vertices_updated, combine_stats.edge_matches_found, combine_stats.edges_updated, UUID), () -> BeanTemplateUtils.toMap(combine_stats))); }); }); }); _logger.optional().ifPresent(logger -> { logger.log(Level.INFO, ErrorUtils.lazyBuildMessage(true, () -> "GraphBuilderEnrichmentService", () -> "system.onStageComplete", () -> null, () -> ErrorUtils.get( "Final merge stats: V_matched={0} V_updated={1} E_matched={2} E_updated={3} (uuid={4} sleep_ms={5})", global_combine_stats.vertex_matches_found, global_combine_stats.vertices_updated, global_combine_stats.edge_matches_found, global_combine_stats.edges_updated, UUID, sleep_time), () -> BeanTemplateUtils.toMap(global_combine_stats))); }); _logger.optional().ifPresent(logger -> { logger.log(Level.INFO, ErrorUtils.lazyBuildMessage(true, () -> "GraphBuilderEnrichmentService", () -> "system.onStageComplete", () -> null, () -> ErrorUtils.get( "Graph stats: V_emitted={0} V_matched={1} V_created={2} V_updated={3} V_errors={4} E_emitted={5} E_matched={6} E_created={7} E_updated={8} E_errors={9} (uuid={10})", _mutable_stats.get().vertices_emitted, _mutable_stats.get().vertex_matches_found, _mutable_stats.get().vertices_created, _mutable_stats.get().vertices_updated, _mutable_stats.get().vertex_errors, _mutable_stats.get().edges_emitted, _mutable_stats.get().vertex_matches_found, _mutable_stats.get().edges_created, _mutable_stats.get().edges_updated, _mutable_stats.get().edge_errors, UUID), () -> BeanTemplateUtils.toMap(_mutable_stats.get()))); }); _custom_graph_decomp_handler.optional().ifPresent(handler -> handler.onStageComplete(is_original)); _custom_graph_merge_handler.optional().ifPresent(handler -> handler.onStageComplete(is_original)); } }