Java tutorial
/******************************************************************************* * Copyright 2015, The IKANOW Open Source Project. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ package com.ikanow.aleph2.search_service.elasticsearch.utils; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.LinkedHashMap; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.Spliterator; import java.util.Spliterators; import java.util.function.Function; import java.util.stream.Collector; import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; import org.elasticsearch.client.Client; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import scala.Tuple2; import scala.Tuple3; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.LinkedHashMultimap; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean; import com.ikanow.aleph2.data_model.objects.data_import.DataSchemaBean; import com.ikanow.aleph2.data_model.utils.BeanTemplateUtils; import com.ikanow.aleph2.data_model.utils.BucketUtils; import com.ikanow.aleph2.data_model.utils.Lambdas; import com.ikanow.aleph2.data_model.utils.Optionals; import com.ikanow.aleph2.data_model.utils.Patterns; import com.ikanow.aleph2.data_model.utils.Tuples; import com.ikanow.aleph2.search_service.elasticsearch.data_model.ElasticsearchIndexServiceConfigBean; import com.ikanow.aleph2.search_service.elasticsearch.data_model.ElasticsearchIndexServiceConfigBean.SearchIndexSchemaDefaultBean; import com.ikanow.aleph2.search_service.elasticsearch.data_model.ElasticsearchIndexServiceConfigBean.SearchIndexSchemaDefaultBean.CollidePolicy; import com.ikanow.aleph2.shared.crud.elasticsearch.data_model.ElasticsearchContext; import fj.data.Either; /** A collection of utilities for converting buckets into Elasticsearch attributes * @author Alex */ public class ElasticsearchIndexUtils { public static final String CUSTOM_META = "_meta"; public static final String CUSTOM_META_SECONDARY = "secondary_buffer"; public static final String CUSTOM_META_BUCKET = "bucket_path"; public static final String CUSTOM_META_IS_PRIMARY = "is_primary"; //"true" or "false" public final static String DEFAULT_TOKENIZATION_TYPE = "_default_"; public final static String NO_TOKENIZATION_TYPE = "_none_"; protected final static String STRING_OVERRIDE_NAME = "__OVERRIDE_STRING_FIELDS__"; ///////////////////////////////////////////////////////////////////// // INDEX NAMES /** Returns the base index name (before any date strings, splits etc) have been appended * Generated by taking 1-3 directories from the path and then appening the end of a UUID * @param bucket * @param secondary_buffer - for "ping pong" buffers * @return */ public static String getBaseIndexName(final DataBucketBean bucket, final Optional<String> secondary_buffer) { final Optional<String> override_string = Optionals .<String>of(() -> ((String) bucket.data_schema().search_index_schema().technology_override_schema() .get(SearchIndexSchemaDefaultBean.index_name_override_))); return override_string.orElseGet(() -> { return BucketUtils.getUniqueSignature(bucket.full_name(), secondary_buffer); }); } /** Returns a readable copy of the base name (before any date strings, splits etc) * Uses the read alias that all non-overridden indexes have * @param bucket * @return */ public static String getReadableBaseIndexName(final DataBucketBean bucket) { final Optional<String> override_string = Optionals .<String>of(() -> ((String) bucket.data_schema().search_index_schema().technology_override_schema() .get(SearchIndexSchemaDefaultBean.index_name_override_))); return override_string.orElseGet(() -> { return ElasticsearchContext.READ_PREFIX + BucketUtils.getUniqueSignature(bucket.full_name(), Optional.empty()); }); } /** Returns either a specific type name, or "_default_" if auto types are used * @param bucket * @return */ public static String getTypeKey(final DataBucketBean bucket, final ObjectMapper mapper) { return Optional.ofNullable(bucket.data_schema()).map(DataSchemaBean::search_index_schema) .filter(s -> Optional.ofNullable(s.enabled()).orElse(true)) .map(DataSchemaBean.SearchIndexSchemaBean::technology_override_schema) .map(t -> BeanTemplateUtils .from(mapper.convertValue(t, JsonNode.class), SearchIndexSchemaDefaultBean.class).get()) .<String>map(cfg -> { return Patterns.match(cfg.collide_policy()).<String>andReturn() .when(cp -> SearchIndexSchemaDefaultBean.CollidePolicy.error == cp, __ -> Optional.ofNullable(cfg.type_name_or_prefix()) .orElse(ElasticsearchIndexServiceConfigBean.DEFAULT_FIXED_TYPE_NAME)) // (ie falls through to default below) .otherwise(__ -> null); }).orElse("_default_"); // (the default - "auto type") } /** Grabs the data suffix (including the leading "_") away from the full index (well, assuming it doesn't include the buffer #) * @param index_name - in the format <base_index>[_<secondary>]__<id>[_date] NOTE WITH NO BUFFER NUMBER AT THE END * @return */ public static Optional<String> snagDateFormatFromIndex(final String index_name) { return Optional.of(index_name).map(s -> Tuples._2T(s, s.lastIndexOf("__"))) // find the __<id> .filter(t2 -> t2._2() >= 0).map(t2 -> t2._1().substring(t2._2() + 2)) // step over the __ // then same(-ish) again to find the date .map(s -> Tuples._2T(s, s.indexOf("_"))) // find the _<date>[_segment] .filter(t2 -> t2._2() >= 0).map(t2 -> t2._1().substring(t2._2())) // return the _<date>[_segment] .filter(substr -> substr.length() > 3) // get rid of just [_segment] case .map(substr -> { // remove the _segment if present final int segment = substr.lastIndexOf("_"); if (segment > 0) { //>0 because of the leading _ return substr.substring(0, segment); } else { return substr; } }); } /** Utility that returns a set of types for the specified (comma separated indexes) * @param client * @param index_list * @return */ public static Multimap<String, String> getTypesForIndex(final Client client, final String index_list) { return Arrays .<Object>stream(client.admin().cluster().prepareState().setIndices(index_list) .setRoutingTable(false).setNodes(false).setListenerThreaded(false).get().getState() .getMetaData().getIndices().values().toArray()) .map(obj -> (IndexMetaData) obj) .collect(Collector.of(LinkedHashMultimap::create, (acc, v) -> Optionals.streamOf(v.getMappings().keysIt(), false) .filter(t -> !t.equals("_default_")).forEach(t -> acc.put(v.index(), t)), (acc1, acc2) -> { acc1.putAll(acc2); return acc1; })); } ///////////////////////////////////////////////////////////////////// // MAPPINGS - DEFAULTS /** Builds a lookup table of settings * @param mapping - the mapping to use * @param type - if the index has a specific type, lookup that and _default_ ; otherwise just _default * @return */ public static LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode> parseDefaultMapping( final JsonNode mapping, final Optional<String> type, final Optional<DataSchemaBean.SearchIndexSchemaBean> maybe_search_index_schema, final Optional<DataSchemaBean.DocumentSchemaBean> maybe_document_schema, final SearchIndexSchemaDefaultBean search_index_schema_override, final ObjectMapper mapper) { //(see similar code in createComplexStringLookups) final boolean tokenize_by_default = maybe_search_index_schema.map(schema -> schema.tokenize_by_default()) .orElse(true); final boolean dual_tokenize_by_default = Optional .ofNullable(search_index_schema_override.dual_tokenize_by_default()).orElse(false); final JsonNode default_string_mapping = ((ObjectNode) (ElasticsearchIndexUtils.getMapping( Tuples._2T(tokenize_by_default, dual_tokenize_by_default), search_index_schema_override, mapper, true))).put(TYPE_MATCH_NAME, "string").put(PATH_MATCH_NAME, "*"); // (this is always not tokenized but inherits dual tokenization) final ObjectNode not_analyzed_field = ((ObjectNode) (ElasticsearchIndexUtils.getMapping( Tuples._2T(false, dual_tokenize_by_default), search_index_schema_override, mapper, true))) .put(TYPE_MATCH_NAME, "string"); final LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode> ret = Optional .ofNullable(mapping.get("mappings")).map(m -> { if (!m.isObject()) throw new RuntimeException("mappings must be object"); return m; }) .map(m -> Optional.ofNullable(m.get(type.orElse("_default_"))) .map(mm -> !mm.isNull() ? mm : m.get("_default_")).orElse(m.get("_default_"))) .filter(m -> !m.isNull()).map(i -> { if (!i.isObject()) throw new RuntimeException(type + " must be object"); return i; }).map(i -> { // OK so I have a list of dynamic_templates, and a list of properties - and then a set of string defaults to apply // 1) want to leave the properties alone // 2) then the tokenization overrides from createComplexStringLookups // 3) then the existing templates final Map<Either<String, Tuple2<String, String>>, JsonNode> override_props = createComplexStringLookups( maybe_search_index_schema, search_index_schema_override, mapper); // ensure string doc fields aren't analyzed final Map<Either<String, Tuple2<String, String>>, String> type_override = maybe_search_index_schema .map(s -> s.type_override()).map(m -> buildTypeMap(m)).orElse(Collections.emptyMap()); final Map<Either<String, Tuple2<String, String>>, JsonNode> doc_props = maybe_document_schema .map(ds -> ds.deduplication_fields()) .<Map<Either<String, Tuple2<String, String>>, JsonNode>>map(fields -> { return fields.stream().filter(f -> !override_props.containsKey(Either.left(f))) .filter(f -> !override_props.containsKey(Either.right(Tuples._2T(f, "*")))) .filter(f -> !type_override.containsKey(Either.left(f))) .filter(f -> !type_override.containsKey(Either.right(Tuples._2T(f, "*")))) .<Tuple2<Either<String, Tuple2<String, String>>, JsonNode>>map( f -> Tuples._2T(Either.right(Tuples._2T(f, "string")), not_analyzed_field.deepCopy().put(PATH_MATCH_NAME, f))) .collect(Collectors.toMap( (Tuple2<Either<String, Tuple2<String, String>>, JsonNode> t2) -> t2 ._1(), t2 -> t2._2())); }).orElse(Collections.emptyMap()); final LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode> props = new LinkedHashMap<>(); props.putAll(doc_props); // (put these first - though actually i'm fixing the order with an explicit sort in the columnar section) props.putAll(override_props); // extra mappings and extra templates Optionals.of(() -> search_index_schema_override.extra_field_mappings()) .map(o -> mapper.convertValue(o, JsonNode.class)).ifPresent(j -> { props.putAll(getTemplates(j, default_string_mapping, props.keySet())); props.putAll(getProperties(j)); }); // full mappings at the end props.putAll(getTemplates(i, default_string_mapping, props.keySet())); props.putAll(getProperties(i)); return props; }).orElse(new LinkedHashMap<>()); return ret; } /** Recursive function that will return all fields in an insert (eg "geoip", "geoip.location") * @param index * @return */ protected static Stream<String> getAllFixedFields_internal(final JsonNode index) { return Optional.ofNullable(index.get("properties")).filter(p -> !p.isNull()).map(p -> { if (!p.isObject()) throw new RuntimeException("properties must be object"); return p; }).map(p -> { return StreamSupport.stream(Spliterators.spliteratorUnknownSize(p.fields(), Spliterator.ORDERED), false) .map(kv -> { return kv; }).<String>flatMap(kv -> { final Stream<String> parent_element = Stream.of(kv.getKey()); return Stream.concat(parent_element, getAllFixedFields_internal(kv.getValue()).map(s -> kv.getKey() + "." + s)); }); }).orElse(Stream.<String>empty()); } /** Top level function to return the fixed fields that we need to exclude from auto type generation * @param mapping - the top level mapping * @return */ public static Set<String> getAllFixedFields(final JsonNode mapping) { return Optional.ofNullable(mapping.get("mappings")).filter(p -> !p.isNull()).filter(p -> p.isObject()) .flatMap(p -> Optional.ofNullable(p.get("_default_"))).filter(p -> !p.isNull()) .filter(p -> p.isObject()).map(p -> getAllFixedFields_internal(p).collect(Collectors.toSet())) .orElse(Collections.emptySet()); } /** Get a set of field mappings from the "properties" section of a mapping * @param index * @return */ protected static LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode> getProperties( final JsonNode index) { return Optional.ofNullable(index.get("properties")).filter(p -> !p.isNull()).map(p -> { if (!p.isObject()) throw new RuntimeException("properties must be object"); return p; }).map(p -> { return StreamSupport.stream(Spliterators.spliteratorUnknownSize(p.fields(), Spliterator.ORDERED), false) .map(kv -> { if (!kv.getValue().has("type") && !kv.getValue().has("properties")) throw new RuntimeException(SearchIndexErrorUtils .get("field {0} must have a 'type' or 'properties' sub-field", kv.getKey())); return kv; }).collect( Collectors.<Map.Entry<String, JsonNode>, Either<String, Tuple2<String, String>>, JsonNode, LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode>>toMap( kv -> Either.<String, Tuple2<String, String>>left(kv.getKey()), kv -> kv.getValue(), (v1, v2) -> v1, // (should never happen) () -> new LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode>())); }).orElse(new LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode>()); } /** Get a set of field mappings from the "dynamic_templates" section of a mapping * @param index * @return */ protected static LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode> getTemplates( final JsonNode index, final JsonNode default_string_mapping, final Set<Either<String, Tuple2<String, String>>> already_processed) { return Optional.ofNullable(index.get("dynamic_templates")).filter(p -> !p.isNull()).map(p -> { if (!p.isArray()) throw new RuntimeException("dynamic_templates must be object"); return p; }).map(p -> { return StreamSupport .stream(Spliterators.spliteratorUnknownSize(p.elements(), Spliterator.ORDERED), false) .map(pf -> { if (!pf.isObject()) throw new RuntimeException("dynamic_templates[*] must be object"); return pf; }) .flatMap(pp -> StreamSupport .stream(Spliterators.spliteratorUnknownSize(pp.fields(), Spliterator.ORDERED), false)) .filter(kv -> !kv.getKey().equals(STRING_OVERRIDE_NAME) || !already_processed.contains(Either.right(Tuples._2T("*", "string")))) // (don't override a specified string) .map(kv -> !kv.getKey().equals(STRING_OVERRIDE_NAME) ? kv : Maps.immutableEntry(kv.getKey(), default_string_mapping)) //(special case - overwrite with system default) .collect( Collectors.<Map.Entry<String, JsonNode>, Either<String, Tuple2<String, String>>, JsonNode, LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode>>toMap( kv -> Either.right(buildMatchPair(kv.getValue())), kv -> kv.getValue(), (v1, v2) -> v1, // (should never happen) () -> new LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode>())); }).orElse(new LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode>()); } private static final String RAW_MATCH_NAME = "match"; //(only use this to read, never set) private static final String PATH_MATCH_NAME = "path_match"; private static final String TYPE_MATCH_NAME = "match_mapping_type"; /** Builds a match pair from a field mapping * @param template * @return */ protected static Tuple2<String, String> buildMatchPair(final JsonNode template) { return Tuples._2T( Optional.ofNullable(template.get(RAW_MATCH_NAME)).map(j -> j.asText()).orElse( Optional.ofNullable(template.get(PATH_MATCH_NAME)).map(j -> j.asText()).orElse("*")), Optional.ofNullable(template.get(TYPE_MATCH_NAME)).map(j -> j.asText()).orElse("*")); } /** Creates a single string from a match/match_mapping_type pair * @param field_info * @return */ protected static String getFieldNameFromMatchPair(final Tuple2<String, String> field_info) { return field_info._1().replace("*", "STAR").replace(".", "DOT").replace("_", "BAR") + "_" + field_info._2().replace("*", "STAR"); }; ///////////////////////////////////////////////////////////////////// // MAPPINGS - CREATION // Quick guide to mappings // under mappings you can specify either // - specific types // - _default_, which applies to anything that doesn't match that type // - then under each type (or _default_).. // - you can specify dynamic_templates/properties/_all/dynamic_date_formats/date_detection/numeric_detection // - under properties you can then specify types and then fields // - under dynamic_templates you can specify fields // - under fields you can specify type/fielddata(*)/similarity/analyzer/etc // // (*) https://www.elastic.co/guide/en/elasticsearch/reference/current/fielddata-formats.html // // OK so we can specify parts of mappings in the following ways: // - COLUMNAR: // - based on field name .. maps to path_match // - based on type .. maps to match_mapping_type // (and then for these columnar types we want to specify // "type": "{dynamic_type}", "index": "no", "fielddata": { "format": "doc_values" } // (or disabled) // but potentially would like to be able to add more info as well/instead // so maybe need a default and then per-key override // // OK ... then in addition, we want to be able to set other elements of the search from the search override schema // The simplest way of doing this is probably just to force matching on fields/patterns and then to merge them /////////////////////////////////////////////////////////////// // TEMPORAL PROCESSING /** Creates a mapping for the bucket - temporal elements * @param bucket * @return * @throws IOException */ public static XContentBuilder getTemporalMapping(final DataBucketBean bucket, final Optional<XContentBuilder> to_embed) { try { final XContentBuilder start = to_embed.orElse(XContentFactory.jsonBuilder().startObject()); if (!Optional.ofNullable(bucket.data_schema()).map(DataSchemaBean::temporal_schema) .filter(s -> Optional.ofNullable(s.enabled()).orElse(true)).isPresent()) return start; // Nothing to be done here return start; } catch (IOException e) { //Handle fake "IOException" return null; } } /////////////////////////////////////////////////////////////// // COLUMNAR PROCESSING // (Few constants to tidy stuff up) protected final static String BACKUP_FIELD_MAPPING_PROPERTIES = "{\"index\":\"not_analyzed\"}"; protected final static String BACKUP_FIELD_MAPPING_TEMPLATES = "{\"mapping\":" + BACKUP_FIELD_MAPPING_PROPERTIES + "}"; protected final static String DEFAULT_FIELDDATA_NAME = "_default_"; protected final static String DISABLED_FIELDDATA = "{\"format\":\"disabled\"}"; /** Util to prevent nested fields from being treated as properties * @param s * @return */ public static Either<String, Tuple2<String, String>> getKey(String s) { return s.contains(".") ? Either.right(Tuples._2T(s, "*")) : Either.left(s); } /** Utility to build a lookup map based on a columnar schema * @param columnar_schema * @return */ protected static Map<Either<String, Tuple2<String, String>>, Boolean> createComplexStringLookups_partial( final DataSchemaBean.ColumnarSchemaBean columnar_schema) { return Stream .<Stream<Tuple2<Either<String, Tuple2<String, String>>, Boolean>>>of( Optionals.ofNullable(columnar_schema.field_include_list()).stream() .map(s -> Tuples._2T(getKey(s), true)), Optionals .ofNullable(columnar_schema.field_exclude_list()).stream() .map(s -> Tuples._2T(getKey(s), false)), Optionals.ofNullable(columnar_schema.field_include_pattern_list()).stream() .map(s -> Tuples._2T( Either.<String, Tuple2<String, String>>right(Tuples._2T(s, "*")), true)), Optionals.ofNullable(columnar_schema.field_exclude_pattern_list()).stream() .map(s -> Tuples._2T( Either.<String, Tuple2<String, String>>right(Tuples._2T(s, "*")), false)), Optionals.ofNullable(columnar_schema.field_type_include_list()).stream() .map(s -> Tuples._2T( Either.<String, Tuple2<String, String>>right(Tuples._2T("*", s)), true)), Optionals.ofNullable(columnar_schema.field_type_exclude_list()).stream() .map(s -> Tuples._2T( Either.<String, Tuple2<String, String>>right(Tuples._2T("*", s)), false))) .flatMap(__ -> __).collect(Collectors.toMap(t2 -> t2._1(), t2 -> t2._2())); } /** Returns a JsonNode with the field's mapping * @param tokenize_dual * @param defaults * @return */ protected static JsonNode getMapping(final Tuple2<Boolean, Boolean> tokenize_dual, final SearchIndexSchemaDefaultBean defaults, final ObjectMapper mapper, boolean is_dynamic_template) { return Optional .of(Patterns.match(tokenize_dual).<JsonNode>andReturn() .when(td -> td.equals(Tuples._2T(true, true)), __ -> mapper.convertValue(defaults.dual_tokenized_string_field(), JsonNode.class)) .when(td -> td.equals(Tuples._2T(true, false)), __ -> mapper.convertValue(defaults.tokenized_string_field(), JsonNode.class)) .when(td -> td.equals(Tuples._2T(false, true)), __ -> mapper.convertValue(defaults.dual_untokenized_string_field(), JsonNode.class)) .when(td -> td.equals(Tuples._2T(false, false)), __ -> mapper.convertValue(defaults.untokenized_string_field(), JsonNode.class)) .otherwiseAssert()) .map(j -> is_dynamic_template ? mapper.createObjectNode().set("mapping", j) : j).get(); } /** Builds an inverted type map (type not inserted into tuple2 key to allow it to match against types) * @param types * @return */ public static Map<Either<String, Tuple2<String, String>>, String> buildTypeMap( final Map<String, DataSchemaBean.ColumnarSchemaBean> types) { return types.entrySet().stream().flatMap(kv -> { return Stream .<Stream<Tuple2<Either<String, Tuple2<String, String>>, String>>>of(Optionals .ofNullable(kv.getValue().field_include_list()).stream() .map(s -> Tuples._2T(getKey(s), kv.getKey())), Optionals.ofNullable(kv.getValue().field_include_pattern_list()).stream() .map(s -> Tuples._2T( Either.<String, Tuple2<String, String>>right(Tuples._2T(s, "*")), kv.getKey()))) .<Tuple2<Either<String, Tuple2<String, String>>, String>>flatMap(__ -> __); }).collect(Collectors.toMap(t2 -> t2._1(), t2 -> t2._2())); } /** Converts a fully specified non-nested field into a property * NOTE: NOT GOING TO USE THIS FOR NOW, MOST FIELDS WILL BE NESTED ANYWAY * (THEREFORE UNTESTED, SO NEED TO WRITE TEST COVERAGE BEFORE USING) * @param in * @return */ // protected static Map.Entry<Either<String, Tuple2<String, String>>, JsonNode> convertTemplateToProperty(final Map.Entry<Either<String, Tuple2<String, String>>, JsonNode> in) { // return Patterns.match(in.getKey()).<Map.Entry<Either<String, Tuple2<String, String>>, JsonNode>>andReturn() // .when(k -> k.isRight() && !k.right().value()._1().contains("*") && !k.right().value()._2().contains("*") && !k.right().value()._2().contains("*"), k -> { // return new AbstractMap.SimpleImmutableEntry<Either<String, Tuple2<String, String>>, JsonNode>(Either.left(k.right().value()._1()), in.getValue().get("mapping")); // }) // .otherwise(__ -> in) // ; // } /** Utility to build a lookup map based on a columnar schema * @param columnar_schema * @return */ protected static Map<Either<String, Tuple2<String, String>>, JsonNode> createComplexStringLookups( final Optional<DataSchemaBean.SearchIndexSchemaBean> maybe_search_index_schema, final SearchIndexSchemaDefaultBean search_index_schema_override, final ObjectMapper mapper) { // (see similar code in parseDefaultMapping) final boolean tokenize_by_default = maybe_search_index_schema.map(s -> s.tokenize_by_default()) .orElse(true); final boolean dual_tokenize_by_default = Optional .ofNullable(search_index_schema_override.dual_tokenize_by_default()).orElse(false); // Start with streams of tokenized, non-tokenized, and dual fields return Stream .of(maybe_search_index_schema.map(s -> s.tokenization_override()) .map(m -> m.get(DEFAULT_TOKENIZATION_TYPE)) .map(columnar_schema -> createComplexStringLookups_partial(columnar_schema)) .orElse(Collections.emptyMap()).entrySet().stream() .<Tuple3<Either<String, Tuple2<String, String>>, Boolean, Boolean>>map( kv -> Tuples._3T(kv.getKey(), kv.getValue(), null)), maybe_search_index_schema.map(s -> s.tokenization_override()) .map(m -> m.get(NO_TOKENIZATION_TYPE)) .map(columnar_schema -> createComplexStringLookups_partial(columnar_schema)) .orElse(Collections.emptyMap()).entrySet().stream() .<Tuple3<Either<String, Tuple2<String, String>>, Boolean, Boolean>>map( kv -> Tuples._3T(kv.getKey(), !kv.getValue(), null)), Optional.ofNullable(search_index_schema_override.dual_tokenization_override()) .map(columnar_schema -> createComplexStringLookups_partial(columnar_schema)) .orElse(Collections.emptyMap()).entrySet().stream() .<Tuple3<Either<String, Tuple2<String, String>>, Boolean, Boolean>>map( kv -> Tuples._3T(kv.getKey(), null, kv.getValue()))) .flatMap(__ -> __) // Merge the map .collect(Collectors.toMap(t3 -> t3._1(), t3 -> Tuples._2T(t3._2(), t3._3()), // use the existing value, unless it's null in which case overwrite it (t2_1, t2_2) -> Tuples._2T(Optional.ofNullable(t2_1._1()).orElse(t2_2._1()), Optional.ofNullable(t2_2._1()).orElse(t2_2._2())))) .entrySet() // Now convert to the JSON mapping .stream() .map(kv -> Tuples._2T(kv.getKey(), getMapping( Tuples._2T(Optional.ofNullable(kv.getValue()._1()).orElse(tokenize_by_default), Optional.ofNullable(kv.getValue()._2()).orElse(dual_tokenize_by_default)), search_index_schema_override, mapper, kv.getKey().isRight()))) .collect(Collectors.toMap(t2 -> t2._1(), t2 -> t2._2())); } /** Sorts keys based on how unspecific they are * - anything with no *s first (10 + 1)=11 * - then string fields with * type (10 + 3)=13 * - then glob fields with fixed type (20 + 1)=21 * - then glob fields with * type (20 + 3)=23 * - then * fields with fixed type (30 + 1)=31 * - then *,* (3 + 30)=33 (30 + 3)=33 * @param key * @return */ protected static int sortKey(Either<String, Tuple2<String, String>> key) { return key.either(s -> 0, t2 -> 10 * sortKey(t2._1()) + sortKey(t2._2())); } /** Sorts a string based on unspecific it is * @param field * @return */ protected static int sortKey(String field) { return Patterns.match(field).<Integer>andReturn().when(k -> k.equals("*"), __ -> 3) .when(k -> k.contains("*"), __ -> 2).otherwise(() -> 1); } /** Creates a mapping for the bucket - columnar elements * ALSO INCLUDES THE PER-FIELD CONFIGURATION FROM THE SEARCH_INDEX_SCHEMA AND TEMPORAL_SCHMEA * @param bucket * @return * @throws IOException */ public static XContentBuilder getColumnarMapping(final DataBucketBean bucket, Optional<XContentBuilder> to_embed, final LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode> field_lookups, final JsonNode enabled_not_analyzed, final JsonNode enabled_analyzed, final JsonNode default_not_analyzed, final JsonNode default_analyzed, final Optional<JsonNode> doc_schema, final SearchIndexSchemaDefaultBean search_index_schema_override, final ObjectMapper mapper, final String index_type) { try { final XContentBuilder start = to_embed.orElse(XContentFactory.jsonBuilder().startObject()); final boolean columnar_enabled = Optional.ofNullable(bucket.data_schema()) .map(DataSchemaBean::columnar_schema).filter(s -> Optional.ofNullable(s.enabled()).orElse(true)) .isPresent(); final Map<Either<String, Tuple2<String, String>>, String> type_override = Optionals .of(() -> bucket.data_schema().search_index_schema().type_override()).map(m -> buildTypeMap(m)) .orElse(Collections.emptyMap()); // If no columnar settings are specified then go with a sensible default final Optional<DataSchemaBean.ColumnarSchemaBean> maybe_user_columnar_schema = Optionals .of(() -> bucket.data_schema().columnar_schema()); final DataSchemaBean.ColumnarSchemaBean columnar_schema = maybe_user_columnar_schema .filter(__ -> columnar_enabled).filter(schema -> (null == schema.field_include_list()) && // ie the entire thing is empty (null == schema.field_exclude_list()) && (null == schema.field_include_pattern_list()) && (null == schema.field_type_include_list()) && (null == schema.field_exclude_pattern_list()) && (null == schema.field_type_exclude_list())) .map(schema -> BeanTemplateUtils.clone(schema) .with(DataSchemaBean.ColumnarSchemaBean::field_type_include_list, Arrays.asList("string", "number", "date")) .done()) .orElseGet(() -> maybe_user_columnar_schema.orElse(null)) // (NOTE: can only be null if columnar_enabled is false) ; final LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode> column_lookups_pretypes = Stream .of(columnar_enabled ? createFieldIncludeLookups( Optionals.ofNullable(columnar_schema.field_include_list()).stream(), fn -> getKey(fn), field_lookups, enabled_not_analyzed, enabled_analyzed, true, search_index_schema_override, type_override, mapper, index_type) : Stream.<Tuple2<Either<String, Tuple2<String, String>>, JsonNode>>empty(), columnar_enabled ? createFieldExcludeLookups( Optionals.ofNullable(columnar_schema.field_exclude_list()).stream(), fn -> getKey(fn), field_lookups, search_index_schema_override, type_override, mapper, index_type) : Stream.<Tuple2<Either<String, Tuple2<String, String>>, JsonNode>>empty(), columnar_enabled ? createFieldIncludeLookups( Optionals.ofNullable(columnar_schema.field_include_pattern_list()) .stream(), fn -> Either.right(Tuples._2T(fn, "*")), field_lookups, enabled_not_analyzed, enabled_analyzed, true, search_index_schema_override, type_override, mapper, index_type) : Stream.<Tuple2<Either<String, Tuple2<String, String>>, JsonNode>>empty(), columnar_enabled ? createFieldIncludeLookups( Optionals.ofNullable(columnar_schema.field_type_include_list()) .stream(), fn -> Either.right(Tuples._2T("*", fn)), field_lookups, enabled_not_analyzed, enabled_analyzed, true, search_index_schema_override, type_override, mapper, index_type) : Stream.<Tuple2<Either<String, Tuple2<String, String>>, JsonNode>>empty(), columnar_enabled ? createFieldExcludeLookups( Optionals.ofNullable(columnar_schema.field_exclude_pattern_list()) .stream(), fn -> Either.right(Tuples._2T(fn, "*")), field_lookups, search_index_schema_override, type_override, mapper, index_type) : Stream.<Tuple2<Either<String, Tuple2<String, String>>, JsonNode>>empty(), columnar_enabled ? createFieldExcludeLookups( Optionals.ofNullable(columnar_schema.field_type_exclude_list()) .stream(), fn -> Either.right(Tuples._2T("*", fn)), field_lookups, search_index_schema_override, type_override, mapper, index_type) : Stream.<Tuple2<Either<String, Tuple2<String, String>>, JsonNode>>empty(), // Finally add the default columnar lookups to the unmentioned strings (ensures that *_* is at the end) field_lookups.entrySet().stream() .flatMap(kv -> createFieldIncludeLookups(Stream.of(kv.getKey().toString()), __ -> kv.getKey(), field_lookups, default_not_analyzed, default_analyzed, false, search_index_schema_override, type_override, mapper, index_type))) .flatMap(x -> x).collect(Collectors.toMap(t2 -> t2._1(), t2 -> t2._2(), (v1, v2) -> v1, // (ie ignore duplicates) () -> new LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode>())); ; // Also any types that didn't map onto one of the fields or tokens: final LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode> column_lookups_types = type_override .entrySet().stream() // (filter - convert name/* to name/type and check if I've already created such an entry using the type map) .filter(kv -> !column_lookups_pretypes .containsKey(kv.getKey().either(s -> s, t2 -> Tuples._2T(t2._1(), kv.getValue())))) .flatMap(kv -> createFieldIncludeLookups(Stream.of(kv.getKey().toString()), __ -> kv.getKey().<Either<String, Tuple2<String, String>>>either(s -> Either.left(s), t2 -> Either.right(Tuples._2T(t2._1(), kv.getValue()))), field_lookups, default_not_analyzed, default_analyzed, false, search_index_schema_override, type_override, mapper, index_type)) .collect(Collectors.toMap(t2 -> t2._1(), t2 -> t2._2(), (v1, v2) -> v1, () -> new LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode>())); final LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode> column_lookups = Stream .concat(column_lookups_pretypes.entrySet().stream(), column_lookups_types.entrySet().stream()) .sorted((a, b) -> Integer.compare(sortKey(a.getKey()), sortKey(b.getKey()))) .collect(Collectors.toMap(t2 -> t2.getKey(), t2 -> t2.getValue(), (v1, v2) -> v1, () -> new LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode>())); final XContentBuilder properties = column_lookups.entrySet().stream() // properties not dynamic_templates .filter(kv -> kv.getKey().isLeft()) // overwrite with version built using columns if it exists .map(kv -> Tuples._2T(kv.getKey(), column_lookups.getOrDefault(kv.getKey(), kv.getValue()))) .reduce(Optional.of(start.startObject("properties")) // add doc_schema if it exists .map(props -> doc_schema .map(ds -> Optionals.streamOf(ds.fields(), false) .reduce(props, Lambdas.wrap_u((acc, kv) -> acc.rawField(kv.getKey(), kv.getValue().toString().getBytes())), (acc1, acc2) -> acc1 // shouldn't be possible )).orElse(props)).get(), Lambdas.wrap_u((acc, t2) -> acc.rawField(t2._1().left().value(), t2._2().toString().getBytes())), // (left by construction) (acc1, acc2) -> acc1) // (not actually possible) .endObject(); final XContentBuilder templates = column_lookups.entrySet().stream() // properties not dynamic_templates .filter(kv -> kv.getKey().isRight()) // overwrite with version built using columns if it exists .map(kv -> Tuples._2T(kv.getKey(), column_lookups.getOrDefault(kv.getKey(), kv.getValue()))) .reduce(properties.startArray("dynamic_templates"), Lambdas.wrap_u((acc, t2) -> acc.startObject() .rawField(getFieldNameFromMatchPair(t2._1().right().value()), t2._2().toString().getBytes()) // (right by construction) .endObject()), (acc1, acc2) -> acc1) // (not actually possible) .endArray(); return templates; } catch (IOException e) { //Handle in-practice-impossible "IOException" return null; } } /** Creates a list of JsonNodes containing the mapping for fields that will enable field data * @param instream * @param f * @param field_lookups * @param fielddata_not_analyzed * @param fielddata_analyzed * @param override_existing * @param mapper * @return */ protected static Stream<Tuple2<Either<String, Tuple2<String, String>>, JsonNode>> createFieldIncludeLookups( final Stream<String> instream, final Function<String, Either<String, Tuple2<String, String>>> f, final LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode> field_lookups, final JsonNode fielddata_not_analyzed, final JsonNode fielddata_analyzed, final boolean override_existing, final SearchIndexSchemaDefaultBean search_index_schema_override, final Map<Either<String, Tuple2<String, String>>, String> type_override, final ObjectMapper mapper, final String index_type) { return createFieldLookups(instream, f, field_lookups, Optional.of(Tuples._3T(fielddata_not_analyzed, fielddata_analyzed, override_existing)), search_index_schema_override, type_override, mapper, index_type); } /** Creates a list of JsonNodes containing the mapping for fields that will _disable_ field data * @param instream * @param f * @param field_lookups * @param mapper * @return */ protected static Stream<Tuple2<Either<String, Tuple2<String, String>>, JsonNode>> createFieldExcludeLookups( final Stream<String> instream, final Function<String, Either<String, Tuple2<String, String>>> f, final LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode> field_lookups, final SearchIndexSchemaDefaultBean search_index_schema_override, final Map<Either<String, Tuple2<String, String>>, String> type_override, final ObjectMapper mapper, final String index_type) { return createFieldLookups(instream, f, field_lookups, Optional.empty(), search_index_schema_override, type_override, mapper, index_type); } /** Creates a list of JsonNodes containing the mapping for fields that will _enable_ or _disable_ field data depending on fielddata_info is present * (note this can convert a property to a dynamic template, but never the other way round) * @param instream * @param f * @param field_lookups * @param fielddata_info 3tuple containing not_analyzed, analyzed, and override * @param mapper * @return */ protected static Stream<Tuple2<Either<String, Tuple2<String, String>>, JsonNode>> createFieldLookups( final Stream<String> instream, final Function<String, Either<String, Tuple2<String, String>>> f, final LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode> field_lookups, final Optional<Tuple3<JsonNode, JsonNode, Boolean>> fielddata_info, final SearchIndexSchemaDefaultBean search_index_schema_override, final Map<Either<String, Tuple2<String, String>>, String> type_override, final ObjectMapper mapper, final String index_type) { return instream.<Tuple2<Either<String, Tuple2<String, String>>, JsonNode>>map(Lambdas.wrap_u(fn -> { final Either<String, Tuple2<String, String>> either_tmp = f.apply(fn); final Optional<String> maybe_type = Optional.ofNullable(type_override.get(either_tmp)); // add type if present final Either<String, Tuple2<String, String>> either = maybe_type .<Either<String, Tuple2<String, String>>>map(type -> { return either_tmp.<Either<String, Tuple2<String, String>>>either(s -> Either.left(s), t2 -> Either.right(Tuples._2T(t2._1(), type))); }).orElse(either_tmp); final ObjectNode mutable_field_metadata = (ObjectNode) Optional.ofNullable(field_lookups.get(either)) .map(j -> j.deepCopy()) .orElse(either.either(Lambdas.wrap_fj_u(__ -> mapper.readTree(BACKUP_FIELD_MAPPING_PROPERTIES)), Lambdas.wrap_fj_u(__ -> mapper.readTree(BACKUP_FIELD_MAPPING_TEMPLATES)))); //(note that these 2 mappings don't have "type"s - therefore they will result in default_templates not properties - you need the type to generate a property) final ObjectNode mutable_field_mapping_tmp = either.isLeft() ? mutable_field_metadata : (ObjectNode) mutable_field_metadata.get("mapping"); //(override with type if set) maybe_type.ifPresent(type -> mutable_field_mapping_tmp.put("type", type)); final boolean has_type = mutable_field_mapping_tmp.has("type"); final Tuple2<ObjectNode, Either<String, Tuple2<String, String>>> toplevel_eithermod = Lambdas .get(() -> { if (either.isLeft() && !has_type) { final ObjectNode top_level = (ObjectNode) mapper.createObjectNode().set("mapping", mutable_field_metadata); return Tuples._2T(top_level, Either.<String, Tuple2<String, String>>right(Tuples._2T(fn, "*"))); } else { // right[dynamic] *OR* (left[properties] and has-type) return Tuples._2T(mutable_field_metadata, either); } }); final ObjectNode mutable_field_mapping = toplevel_eithermod._2().isLeft() ? toplevel_eithermod._1() : (ObjectNode) toplevel_eithermod._1().get("mapping"); // Special case ... if we're columnar and we're merging with tokenized and non-dual then convert to untokenized instead if (fielddata_info.filter(t3 -> t3._3()).isPresent() && mutable_field_mapping.equals( mapper.convertValue(search_index_schema_override.tokenized_string_field(), JsonNode.class))) { mutable_field_mapping.removeAll(); mutable_field_mapping.setAll((ObjectNode) mapper .convertValue(search_index_schema_override.untokenized_string_field(), ObjectNode.class)); } if (toplevel_eithermod._2().isRight()) { if (!toplevel_eithermod._1().has(PATH_MATCH_NAME) && !toplevel_eithermod._1().has(RAW_MATCH_NAME)) { toplevel_eithermod._1().put(PATH_MATCH_NAME, toplevel_eithermod._2().right().value()._1()); if (!toplevel_eithermod._1().has(TYPE_MATCH_NAME)) toplevel_eithermod._1().put(TYPE_MATCH_NAME, toplevel_eithermod._2().right().value()._2()); } if (!has_type) { if (toplevel_eithermod._2().right().value()._2().equals("*")) { // type is mandatory mutable_field_mapping.put("type", "{dynamic_type}"); } else { mutable_field_mapping.put("type", toplevel_eithermod._2().right().value()._2()); } } } handleMappingFields(mutable_field_mapping, fielddata_info, mapper, index_type); setMapping(mutable_field_mapping, fielddata_info, mapper, index_type); return Tuples._2T(toplevel_eithermod._2(), toplevel_eithermod._1()); })); } /** Utility function to handle fields * TODO (ALEPH-14): need to be able to specify different options for different fields via columnar settings * @param mutable_mapping * @param fielddata_info * @param mapper */ protected static void handleMappingFields(final ObjectNode mutable_mapping, final Optional<Tuple3<JsonNode, JsonNode, Boolean>> fielddata_info, final ObjectMapper mapper, final String index_type) { Optional.ofNullable(mutable_mapping.get("fields")).filter(j -> !j.isNull() && j.isObject()).ifPresent(j -> { StreamSupport.stream(Spliterators.spliteratorUnknownSize(j.fields(), Spliterator.ORDERED), false) .forEach(Lambdas.wrap_consumer_u(kv -> { final ObjectNode mutable_o = (ObjectNode) kv.getValue(); setMapping(mutable_o, fielddata_info, mapper, index_type); })); }); } /** More levels of utility for code reuse * @param mutable_o * @param fielddata_info * @param mapper * @param index_type * @throws JsonProcessingException * @throws IOException */ protected static void setMapping(final ObjectNode mutable_o, final Optional<Tuple3<JsonNode, JsonNode, Boolean>> fielddata_info, final ObjectMapper mapper, final String index_type) throws JsonProcessingException, IOException { if (fielddata_info.isPresent()) { final JsonNode fielddata_not_analyzed = fielddata_info.get()._1(); final JsonNode fielddata_analyzed = fielddata_info.get()._2(); final boolean override_existing = fielddata_info.get()._3(); final boolean is_analyzed = Optional.ofNullable(mutable_o.get("index")) .filter(jj -> !jj.isNull() && jj.isTextual()) .map(jt -> jt.asText().equalsIgnoreCase("analyzed") || jt.asText().equalsIgnoreCase("yes")) .orElse(true); final JsonNode fielddata_settings = is_analyzed ? fielddata_analyzed : fielddata_not_analyzed; Optional.ofNullable(Optional.ofNullable(fielddata_settings.get(index_type)).filter(jj -> !jj.isNull()) .orElse(fielddata_settings.get(DEFAULT_FIELDDATA_NAME))).ifPresent(jj -> { if (override_existing || !mutable_o.has("fielddata")) mutable_o.set("fielddata", jj); }); } else { // This means it's a columnar exclude lookup //Previously did this, however I think it's preferable if columnar is disabled just to leave it alone mutable_o.set("fielddata", mapper.readTree(DISABLED_FIELDDATA)); } } /////////////////////////////////////////////////////////////// // SEARCH PROCESSING /** Creates a mapping for the bucket - search service elements .. up to but not including the mapping + type * NOTE: creates an embedded object that is {{, ie must be closed twice subsequently in order to be a well formed JSON object * @param bucket * @return * @throws IOException */ public static XContentBuilder getSearchServiceMapping(final DataBucketBean bucket, final Optional<String> secondary_buffer, final boolean is_primary, final ElasticsearchIndexServiceConfigBean schema_config, final Optional<XContentBuilder> to_embed, final ObjectMapper mapper) { try { final XContentBuilder start = to_embed.orElse(XContentFactory.jsonBuilder().startObject()); // (Nullable) final ElasticsearchIndexServiceConfigBean.SearchIndexSchemaDefaultBean search_schema = schema_config .search_technology_override(); //(very briefly Nullable) final JsonNode settings = Optional.ofNullable(search_schema).map(s -> s.settings()) .map(o -> mapper.convertValue(o, JsonNode.class)).orElse(null); //(very briefly Nullable) final ObjectNode aliases = (ObjectNode) Optional.ofNullable(search_schema).map(s -> s.aliases()) .map(o -> mapper.convertValue(o, JsonNode.class)).orElse(mapper.createObjectNode()); if (is_primary) { // add the "read only" prefix alias aliases.set( ElasticsearchContext.READ_PREFIX + ElasticsearchIndexUtils.getBaseIndexName(bucket, Optional.empty()), mapper.createObjectNode()); } // Settings final String type_key = getTypeKey(bucket, mapper); return Lambdas.wrap_u(__ -> { if (null == settings) { // nothing to do return start; } else { return start.rawField("settings", settings.toString().getBytes()); } }) // Aliases .andThen(Lambdas.wrap_u(json -> { if (!aliases.elements().hasNext()) { // nothing to do return json; } else { return start.rawField("aliases", aliases.toString().getBytes()); } })) // Mappings and overrides .andThen(Lambdas.wrap_u(json -> json.startObject("mappings").startObject(type_key))) // Add the secondary buffer name to the metadata: .andThen(Lambdas.wrap_u(json -> { return json.rawField(CUSTOM_META, createMergedMeta(Either.right(mapper), bucket, is_primary, secondary_buffer) .toString().getBytes()); })) // More mapping overrides .andThen(Lambdas.wrap_u(json -> { return Optional.ofNullable(search_schema).map(ss -> ss.mapping_overrides()) .map(m -> m.getOrDefault(type_key, m.get("*"))).orElse(Collections.emptyMap()) .entrySet().stream().reduce(json, Lambdas.wrap_u((acc, kv) -> { if (CUSTOM_META.equals(kv.getKey())) { // meta is a special case, merge my results in regardless return acc.rawField(kv.getKey(), createMergedMeta( Either.left( mapper.convertValue(kv.getValue(), JsonNode.class)), bucket, is_primary, secondary_buffer).toString() .getBytes()); } else { return acc.rawField(kv.getKey(), mapper .convertValue(kv.getValue(), JsonNode.class).toString().getBytes()); } }), (acc1, acc2) -> acc1 // (can't actually ever happen) ); })).apply(null); } catch (IOException e) { //Handle fake "IOException" return null; } } /** Utility to build the "_meta" field * @param to_merge * @param bucket * @param is_primary * @param secondary_buffer * @return */ protected static JsonNode createMergedMeta(Either<JsonNode, ObjectMapper> to_merge, DataBucketBean bucket, boolean is_primary, Optional<String> secondary_buffer) { final ObjectNode ret_val = to_merge.either(j -> (ObjectNode) j, om -> om.createObjectNode()); ret_val.put(CUSTOM_META_BUCKET, bucket.full_name()) .put(CUSTOM_META_IS_PRIMARY, Boolean.toString(is_primary)) .put(CUSTOM_META_SECONDARY, secondary_buffer.orElse("")); return ret_val; } /////////////////////////////////////////////////////////////// // TEMPLATE CREATION /** Create a template to be applied to all indexes generated from this bucket * @param bucket * @return */ public static XContentBuilder getTemplateMapping(final DataBucketBean bucket, final Optional<String> secondary_buffer) { try { final XContentBuilder start = XContentFactory.jsonBuilder().startObject().field("template", getBaseIndexName(bucket, secondary_buffer) + "*"); return start; } catch (IOException e) { //Handle fake "IOException" return null; } } /** The control method to build up the mapping from the constituent parts * @param bucket * @param field_lookups * @param enabled_not_analyzed * @param enabled_analyzed * @param mapper * @return */ protected static XContentBuilder getFullMapping(final DataBucketBean bucket, final Optional<String> secondary_buffer, boolean is_primary, final ElasticsearchIndexServiceConfigBean schema_config, final LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode> field_lookups, final JsonNode enabled_not_analyzed, final JsonNode enabled_analyzed, final JsonNode default_not_analyzed, final JsonNode default_analyzed, final Optional<JsonNode> doc_schema, final SearchIndexSchemaDefaultBean search_index_schema_override, final ObjectMapper mapper, final String index_type) { return Lambdas.wrap_u(__ -> getTemplateMapping(bucket, secondary_buffer)) .andThen(json -> getSearchServiceMapping(bucket, secondary_buffer, is_primary, schema_config, Optional.of(json), mapper)) .andThen(json -> getColumnarMapping(bucket, Optional.of(json), field_lookups, enabled_not_analyzed, enabled_analyzed, default_not_analyzed, default_analyzed, doc_schema, search_index_schema_override, mapper, index_type)) .andThen(Lambdas.wrap_u(json -> json.endObject().endObject())) // (close the objects from the search service mapping) .andThen(json -> getTemporalMapping(bucket, Optional.of(json))).apply(null); } /** Utility function to create a mapping out of all the different system components (see also ElasticsearchUtils) * @param bucket * @param config * @return */ public static XContentBuilder createIndexMapping(final DataBucketBean bucket, final Optional<String> secondary_buffer, final boolean is_primary, final ElasticsearchIndexServiceConfigBean schema_config, final ObjectMapper mapper, final String index_type) { final JsonNode default_mapping = mapper.convertValue(schema_config.search_technology_override(), JsonNode.class); // Also get JsonNodes for the default field config bit final boolean columnar_enabled = Optional.ofNullable(bucket.data_schema()) .map(DataSchemaBean::columnar_schema).filter(s -> Optional.ofNullable(s.enabled()).orElse(true)) .isPresent(); final boolean doc_enabled = Optional.ofNullable(bucket.data_schema()).map(DataSchemaBean::document_schema) .filter(s -> Optional.ofNullable(s.enabled()).orElse(true)).isPresent(); // (these can't be null by construction) final JsonNode enabled_analyzed_field = columnar_enabled ? mapper.convertValue(schema_config.columnar_technology_override().enabled_field_data_analyzed(), JsonNode.class) : mapper.createObjectNode(); final JsonNode enabled_not_analyzed_field = columnar_enabled ? mapper.convertValue(schema_config.columnar_technology_override().enabled_field_data_notanalyzed(), JsonNode.class) : mapper.createObjectNode(); final JsonNode default_analyzed_field = columnar_enabled ? mapper.convertValue(schema_config.columnar_technology_override().default_field_data_analyzed(), JsonNode.class) : mapper.createObjectNode(); final JsonNode default_not_analyzed_field = columnar_enabled ? mapper.convertValue(schema_config.columnar_technology_override().default_field_data_notanalyzed(), JsonNode.class) : mapper.createObjectNode(); final Optional<JsonNode> doc_schema = doc_enabled ? Optional.ofNullable(mapper.convertValue(schema_config.document_schema_override(), JsonNode.class)) : Optional.empty(); // Get a list of field overrides Either<String,Tuple2<String,String>> for dynamic/real fields final LinkedHashMap<Either<String, Tuple2<String, String>>, JsonNode> field_lookups = ElasticsearchIndexUtils .parseDefaultMapping(default_mapping, (CollidePolicy.new_type == Optional .ofNullable(schema_config.search_technology_override().collide_policy()) .orElse(CollidePolicy.new_type)) ? Optional.empty() : Optional.ofNullable( schema_config.search_technology_override().type_name_or_prefix()), Optionals.of(() -> bucket.data_schema().search_index_schema()), Optionals.of(() -> bucket.data_schema().document_schema()), schema_config.search_technology_override(), mapper); // If a time field is present then adding the default mapping for it (overwrite @timestamp if that's the specified field): final Optional<JsonNode> time_mapping = Optional.ofNullable(schema_config.temporal_technology_override()) .map(DataSchemaBean.TemporalSchemaBean::technology_override_schema) .map(t -> (Object) t.get("default_timefield_mapping")) .map(js -> mapper.convertValue(js, JsonNode.class)); time_mapping.ifPresent(json -> { Optional.ofNullable(bucket.data_schema()).map(DataSchemaBean::temporal_schema) .filter(s -> Optional.ofNullable(s.enabled()).orElse(true)).map(t -> t.time_field()) .ifPresent(time_field -> { field_lookups.put(Either.left(time_field), json); //(MUTABLE CODE WARNING) }); }); final XContentBuilder test_result = getFullMapping(bucket, secondary_buffer, is_primary, schema_config, field_lookups, enabled_not_analyzed_field, enabled_analyzed_field, default_not_analyzed_field, default_analyzed_field, doc_schema, schema_config.search_technology_override(), mapper, index_type); return test_result; } }