com.useekm.indexing.elasticsearch.ElasticSearchIndexerSettings.java Source code

Introduction

Here is the source code for com.useekm.indexing.elasticsearch.ElasticSearchIndexerSettings.java
Source

/*
 * Copyright 2012 by TalkingTrends (Amsterdam, The Netherlands)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://opensahara.com/licenses/apache-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */
package com.useekm.indexing.elasticsearch;

import info.aduna.iteration.CloseableIteration;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.Validate;
//import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.QueryBuilders;
import org.openrdf.model.BNode;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.URIImpl;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.model.vocabulary.XMLSchema;
import org.openrdf.query.algebra.StatementPattern;
import org.openrdf.query.algebra.UpdateExpr;
import org.openrdf.query.algebra.ValueExpr;
import org.openrdf.query.algebra.Var;
import org.openrdf.sail.SailConnection;
import org.openrdf.sail.SailException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.useekm.indexing.AbstractIndexerSettings;
import com.useekm.indexing.GeoConstants;
import com.useekm.indexing.IndexingSail;
import com.useekm.indexing.IndexingSailConnection;
import com.useekm.indexing.algebra.IndexerExpr;
import com.useekm.indexing.elasticsearch.IndexConfiguration.Mapping;
import com.useekm.indexing.elasticsearch.IndexConfiguration.PropertyConfig;
import com.useekm.indexing.exception.IndexException;
import com.useekm.indexing.internal.Indexer;
import com.useekm.indexing.internal.resolvers.AbstractResolveSearchArg;
import com.useekm.indexing.internal.resolvers.SearchByLiteral;
import com.useekm.indexing.internal.resolvers.SearchNoArg;
import com.useekm.indexing.internal.resolvers.SearchNotIndexed;
import com.useekm.types.GeoConvert;
import com.useekm.types.exception.InvalidGeometryException;
import com.vividsolutions.jts.geom.Envelope;
import com.vividsolutions.jts.geom.Geometry;

/**
 * Settings for {@link ElasticSearchIndexer}.
 * 
 * @see IndexingSail
 */
public class ElasticSearchIndexerSettings extends AbstractIndexerSettings {
    private static final Logger LOG = LoggerFactory.getLogger(ElasticSearchIndexerSettings.class);
    private static final String OBJECT = "object";
    private static final String SHORT = "short";
    private static final String STRING = "string";
    private static final String LONG = "long";
    private static final String INTEGER = "integer";
    private static final String FLOAT = "float";
    private static final String DOUBLE = "double";
    private static final String DATE = "date";
    private static final String BYTE = "byte";
    private static final String BOOLEAN = "boolean";
    private static final String GEO_SHAPE = "geo_shape";
    public static final String NOT_ANALYZED = "not_analyzed";
    public static final String NOT_INDEXED = "no";

    public static final String FIELD_OBJECT_URI = "u";
    public static final String FIELD_OBJECT_TYPE = "a";

    public static final String FILTER_EDGE_NGRAM = "useekmEdgeNgram";
    public static final String ANALYZER_AUTOCOMPLETE_INDEX = "useekmAutocompleteIndex";
    public static final String ANALYZER_AUTOCOMPLETE_SEARCH = "useekmAutocompleteSearch";
    public static final String SUFFIX_ENGRAM = "engram";

    private Client client;
    private Map<String, IndexConfiguration> indexConfigurations;
    private Map<String, String> analyzers;

    private volatile Map<String, Collection<String>> toSub; //used (safe dbl checked locking fashion) outside locking in first init, hence the volatile
    private Map<String, String> toSuper;
    //TODO: the indexer should check that every type maps to only one index-configuration always

    /**
     * SPARQL function for an ElasticSearch query_string query. See {@link QueryBuilders#queryString(String)}.
     */
    public static final URI ES_QUERY = new URIImpl(NS_FTS + "query");
    /**
     * SPARQL function for an ElasticSearch match_all filter. See {@link FilterBuilders#matchAllFilter()}.
     * This is useful when combined with basic graph patterns (BGP) on indexed statements for the searched resource, because these BGP's can be moved to the ElasticSerach filter
     * for a much better performing search.
     */
    public static final URI ES_MATCH_ALL = new URIImpl(NS_FTS + "matchAll");
    /**
     * SPARQL function for an ElasticSearch query to find auto-complete suggestions for a given string.
     * This requires the use of {@link Mapping#Autocomplete} secondary mappings for properties of the searched subjects.
     */
    public static final URI ES_AUTO_COMPLETE = new URIImpl(NS_FTS + "autoComplete");
    /**
     * Option to set the size (number of results to return from an ElasticSearch). If you want to get all results, you need to use the {@link #ES_OPTION_SCROLL} option.
     * The option is set like this:
     * 
     * <pre>
     *    ?resource es:all ?match.
     *    ?match es:size 100.
     *    FILTER(es:matchAll(?match))
     * </pre>
     */
    public static final URI ES_OPTION_SIZE = new URIImpl(NS_FTS + "size");
    /**
     * Option to make a scroll search (returns all possible results for the search, instead of the top xx (where xx is es:size).
     * This creates a search of type {@link SearchType#SCAN}. The value should be a valid {@link TimeValue} according to {@link TimeValue#parseTimeValue(String, TimeValue)}.
     * The option is set like this:
     * 
     * <pre>
     *    ?resource es:all ?match.
     *    ?match es:scroll "10m".
     *    FILTER(es:matchAll(?match))
     * </pre>
     */
    public static final URI ES_OPTION_SCROLL = new URIImpl(NS_FTS + "scroll");
    /**
     * The default size for searches, when not customized via the {@link #ES_OPTION_SIZE} option.
     */
    public static final int DEFAULT_SIZE = 100; //TODO

    private static final Map<URI, AbstractResolveSearchArg> SUPPORTED_FUNCTIONS = new HashMap<URI, AbstractResolveSearchArg>();
    static {
        SUPPORTED_FUNCTIONS.put(ES_QUERY, SearchByLiteral.INSTANCE);
        SUPPORTED_FUNCTIONS.put(TEXT, SearchByLiteral.INSTANCE);
        SUPPORTED_FUNCTIONS.put(ES_MATCH_ALL, SearchNoArg.INSTANCE);
        SUPPORTED_FUNCTIONS.put(ES_AUTO_COMPLETE, SearchByLiteral.INSTANCE);
        //Geometry functions are supported (both indexed and non-indexed),
        // they can be inlined into the indexed filter/query by ExpressionExtractorOptimizer.
        // Therefore QueryExtractor does not need to handle them:
        SUPPORTED_FUNCTIONS.put(GEOF_SF_WITHIN, SearchNotIndexed.INSTANCE);
        SUPPORTED_FUNCTIONS.put(GEOF_SF_INTERSECTS, SearchNotIndexed.INSTANCE);
        SUPPORTED_FUNCTIONS.put(GEOF_SF_OVERLAPS, SearchNotIndexed.INSTANCE);
        SUPPORTED_FUNCTIONS.put(GEOF_SF_CROSSES, SearchNotIndexed.INSTANCE);
        SUPPORTED_FUNCTIONS.put(EXT_COVERED_BY, SearchNotIndexed.INSTANCE);
        SUPPORTED_FUNCTIONS.put(EXT_COVERS, SearchNotIndexed.INSTANCE);
        SUPPORTED_FUNCTIONS.put(GEOF_SF_CONTAINS, SearchNotIndexed.INSTANCE);
        SUPPORTED_FUNCTIONS.put(EXT_CONTAINS_PROPERLY, SearchNotIndexed.INSTANCE);
        SUPPORTED_FUNCTIONS.put(GEOF_SF_EQUALS, SearchNotIndexed.INSTANCE);
        SUPPORTED_FUNCTIONS.put(GEOF_SF_DISJOINT, SearchNotIndexed.INSTANCE);
        SUPPORTED_FUNCTIONS.put(GEOF_SF_TOUCHES, SearchNotIndexed.INSTANCE);
        SUPPORTED_FUNCTIONS.put(GEOF_RELATE, SearchNotIndexed.INSTANCE);
    }
    static final Map<Mapping, EsMapping[]> TYPE_MAPPINGS = new HashMap<IndexConfiguration.Mapping, EsMapping[]>();
    static {
        //primary mappings:
        TYPE_MAPPINGS.put(Mapping.Object,
                new EsMapping[] { new EsMapping((URI) null, null, OBJECT, NOT_ANALYZED).uri() });
        TYPE_MAPPINGS.put(Mapping.Boolean, new EsMapping[] { new EsMapping(XMLSchema.BOOLEAN, null, BOOLEAN) });
        TYPE_MAPPINGS.put(Mapping.Byte, new EsMapping[] { new EsMapping(XMLSchema.BYTE, null, BYTE) });
        TYPE_MAPPINGS.put(Mapping.Date, new EsMapping[] { new EsMapping(XMLSchema.DATE, null, DATE) });
        TYPE_MAPPINGS.put(Mapping.DateTime, new EsMapping[] { new EsMapping(XMLSchema.DATETIME, null, DATE) });
        TYPE_MAPPINGS.put(Mapping.Double, new EsMapping[] { new EsMapping(XMLSchema.DOUBLE, null, DOUBLE) });
        TYPE_MAPPINGS.put(Mapping.Float, new EsMapping[] { new EsMapping(XMLSchema.FLOAT, null, FLOAT) });
        TYPE_MAPPINGS.put(Mapping.Integer, new EsMapping[] { new EsMapping(XMLSchema.INT, null, INTEGER) });
        TYPE_MAPPINGS.put(Mapping.Long, new EsMapping[] { new EsMapping(XMLSchema.LONG, null, LONG) });
        TYPE_MAPPINGS.put(Mapping.Label, new EsMapping[] { new EsMapping((URI) null, null, STRING, NOT_ANALYZED) });
        TYPE_MAPPINGS.put(Mapping.Short, new EsMapping[] { new EsMapping(XMLSchema.SHORT, null, SHORT) });
        TYPE_MAPPINGS.put(Mapping.Text, new EsMapping[] { new EsMapping(XMLSchema.STRING, null, STRING) });
        TYPE_MAPPINGS.put(Mapping.URI,
                new EsMapping[] { new EsMapping((URI) null, null, STRING, NOT_ANALYZED).uri() });
        //TODO: don't store Envelope values in _source field?
        TYPE_MAPPINGS.put(Mapping.Geometry,
                new EsMapping[] { new EsMapping(GeoConstants.GEO_SUPPORTED, null, GEO_SHAPE) });
        //secondary mappings:
        TYPE_MAPPINGS.put(Mapping.Autocomplete, new EsMapping[] { new EsMapping((URI) null, SUFFIX_ENGRAM, STRING,
                null, ANALYZER_AUTOCOMPLETE_INDEX, ANALYZER_AUTOCOMPLETE_SEARCH, false) });
    }

    /**
     * Creates an instance for a specified elasticsearch {@link Client}.
     */
    public ElasticSearchIndexerSettings(Client client) {
        this.client = client;
        this.indexConfigurations = new HashMap<String, IndexConfiguration>();
    }

    public Client getClient() {
        return client;
    }

    void initInference(SailConnection conn, ValueFactory vf) throws SailException {
        synchronized (this) {
            if (toSub == null) {
                boolean done = false;
                try {
                    toSub = new HashMap<String, Collection<String>>();
                    toSuper = new HashMap<String, String>();
                    for (IndexConfiguration config : indexConfigurations.values()) {
                        String type = config.getMatchType();
                        initInference(conn, vf, type);
                    }
                    done = true;
                } finally {
                    if (!done) {
                        //Don't leave half configured information one failure:
                        toSub = null;
                        toSuper = null;
                    }
                }
            }
        }
    }

    void initInference(SailConnection conn, ValueFactory vf, String type) throws SailException {
        synchronized (this) {
            Collection<String> subs = toSub.get(type);
            if (subs != null)
                for (String sub : subs)
                    toSuper.remove(sub);
            subs = new HashSet<String>();
            toSub.put(type, subs);
            boolean subClassOfSelf = false;
            CloseableIteration<? extends Statement, SailException> sts = conn.getStatements(null, RDFS.SUBCLASSOF,
                    vf.createURI(type), IndexConfiguration.MATCH_TYPE_INFERENCE);
            try {
                while (sts.hasNext()) {
                    String subType = sts.next().getSubject().stringValue();
                    subs.add(subType);
                    toSuper.put(subType, type);
                    if (subType.equals(type))
                        subClassOfSelf = true;
                }
            } finally {
                sts.close();
            }
            if (!subClassOfSelf) {
                Validate.isTrue(subs.isEmpty()); //no type information for this type in repository yet
                subs.add(type);
                toSuper.put(type, type);
            }
        }
    }

    /**
     * @return The indexconfiguration with the specified name (the name maps to an elasticsearch document type)
     */
    public IndexConfiguration getIndexConfiguration(String name) {
        return indexConfigurations.get(name);
    }

    /**
     * @return A collection of all {@link IndexConfiguration}s that match the provided (not null!) pred and obj.
     */
    public IndexConfiguration getIndexConfiguration(URI type) {
        Validate.notNull(type);
        String strType = type.stringValue();
        for (IndexConfiguration config : indexConfigurations.values()) {
            if (config.getMatchType().equals(strType))
                return config;
        }
        synchronized (this) {
            if (toSuper != null) {// else not initialized yet, can't return based on type inference
                String indexedType = toSuper.get(strType);
                if (indexedType != null)
                    for (IndexConfiguration config : indexConfigurations.values())
                        if (config.getMatchType().equals(indexedType))
                            return config;
            }
        }
        return null;
    }

    /**
     * @return True if at least one {@link IndexConfiguration} matches the provided (not null!) pred and obj.
     */
    boolean hasIndexConfiguration(URI type) {
        return getIndexConfiguration(type) != null;
    }

    @SuppressWarnings("unchecked") //EMPTY_LIST
    List<IndexConfiguration> getConfigsForProperty(URI pred) {
        List<IndexConfiguration> result = null;
        for (IndexConfiguration config : indexConfigurations.values())
            if (config.isProperty(pred)) {
                if (result == null)
                    result = new ArrayList<IndexConfiguration>();
                result.add(config);
            }
        return result == null ? Collections.EMPTY_LIST : result;
    }

    public void setIndexConfigurations(Collection<IndexConfiguration> configs) {
        indexConfigurations.clear();
        for (IndexConfiguration config : configs)
            if (indexConfigurations.containsKey(config.getName()))
                throw new IllegalArgumentException("Multiple index configurations with name: " + config.getName());
            else
                indexConfigurations.put(config.getName(), config);
    }

    public Collection<IndexConfiguration> getIndexConfigurations() {
        return indexConfigurations.values();
    }

    public void setAnalyzers(Map<String, String> analyzers) {
        this.analyzers = analyzers;
    }

    @SuppressWarnings("unchecked") //EMPTY_MAP
    public Map<String, String> getAnalyzers() {
        return analyzers == null ? (Map<String, String>) Collections.EMPTY_MAP
                : Collections.unmodifiableMap(analyzers);
    }

    @Override
    public boolean isSafeForWrappedSailEvaluation(UpdateExpr expr) {
        //TODO room for optimization by checking whether the update does not query for nor possibly affect indexed statements
        //  (see e.g. how SimpleTypeInferencingSailConnection checks this):
        return false;
    }

    @Override
    public Indexer createIndexer(IndexingSailConnection connection) throws SailException {
        if (toSub == null)
            initInference(connection.getWrappedConnection(), connection.getValueFactory());
        return new ElasticSearchIndexer(this, connection);
    }

    @Override
    public void initialize(boolean emptyIndexes) {
        try {
            Map<String, Map<String, XContentBuilder>> indexMappings = getIndexMappings();
            Collection<String> existingIndexes = new HashSet<String>();
            for (String index : indexMappings.keySet())
                if (client.admin().indices().prepareExists(index).execute().actionGet().isExists())
                    if (emptyIndexes)
                        client.admin().indices().prepareDelete(index).execute().actionGet();
                    else
                        existingIndexes.add(index);
            for (String index : indexMappings.keySet())
                if (!existingIndexes.contains(index))
                    createIndex(index, indexMappings.get(index));
            checkClusterState();
            LOG.info("ES-STATUS: index should be ready now");
        } catch (IOException e) {
            throw new IndexException(e);
        } catch (Exception e) {
            throw new IndexException(e);
        }
    }

    public void checkClusterState() {
        //test case config does not reach a yellow/green status...
        if (client.admin().cluster().prepareHealth().setWaitForYellowStatus().execute().actionGet(60000)
                .isTimedOut())
            throw new IndexException("ElasticSearchCluster not available");
        //client.admin().cluster().health(new ClusterHealthRequest().waitForActiveShards(1)).actionGet();
    }

    private void createIndex(String index, Map<String, XContentBuilder> mappings) throws IOException {
        CreateIndexRequestBuilder builder = client.admin().indices().prepareCreate(index);
        builder.setSettings(createSettings("-1"));
        for (String configName : mappings.keySet())
            builder.addMapping(configName, mappings.get(configName));
        builder.execute().actionGet();
        getClient().admin().indices().prepareRefresh(index).execute().actionGet();
    }

    private Map<String, Map<String, XContentBuilder>> getIndexMappings() throws IOException {
        Map<String, Map<String, XContentBuilder>> indexMappings = new HashMap<String, Map<String, XContentBuilder>>();
        for (IndexConfiguration config : indexConfigurations.values()) {
            Map<String, XContentBuilder> mappings = indexMappings.get(config.getIndexName());
            if (mappings == null) {
                mappings = new HashMap<String, XContentBuilder>();
                indexMappings.put(config.getIndexName(), mappings);
            }
            mappings.put(config.getName(), createMapping(config));
        }
        return indexMappings;
    }

    public XContentBuilder createSettings(String refreshInterval) throws IOException {
        XContentBuilder settings = XContentFactory.jsonBuilder();
        settings.startObject();
        if (refreshInterval != null)
            settings.field("refresh_interval", refreshInterval);
        settings.startObject("analysis");
        settings.startObject("filter");
        settings.startObject(FILTER_EDGE_NGRAM);
        settings.field("max_gram", 16);
        settings.field("min_gram", 2);
        settings.field("type", "edge_ngram");
        settings.endObject(); //FILTER_EDGE_NGRAM
        settings.endObject(); //filter
        settings.startObject("analyzer");
        settings.startObject(ANALYZER_AUTOCOMPLETE_INDEX);
        settings.array("filter", "lowercase", "asciifolding", FILTER_EDGE_NGRAM);
        settings.field("tokenizer", "letter");
        settings.endObject(); //ANALYZER_AUTOCOMPLETE_INDEX
        settings.startObject(ANALYZER_AUTOCOMPLETE_SEARCH);
        settings.array("filter", "lowercase", "asciifolding"); //should be same as above, without FILTER_EDGE_NGRAM
        settings.field("tokenizer", "letter");
        settings.endObject(); //ANALYZER_AUTOCOMPLETE_SEARCH
        for (Map.Entry<String, String> entry : getAnalyzers().entrySet())
            settings.rawField(entry.getKey(), entry.getValue().getBytes());
        settings.endObject(); //analyzer
        settings.endObject(); //analysis
        settings.endObject(); //settings
        return settings;
    }

    private XContentBuilder createMapping(IndexConfiguration config) throws IOException {
        XContentBuilder mapping = XContentFactory.jsonBuilder();
        mapping.startObject();
        mapping.startObject(config.getName());
        mapping.field("date_detection", "false");
        mapping.startObject("properties");
        mapping.startObject(FIELD_OBJECT_TYPE);
        mapping.field("type", "string");
        mapping.field("index", NOT_ANALYZED);
        mapping.endObject();//a
        Collection<String> excludeFromSource = new HashSet<String>();
        for (PropertyConfig property : config.getProperties())
            createMappingForProperty("", mapping, property, excludeFromSource);
        mapping.endObject();
        if (!excludeFromSource.isEmpty()) {
            mapping.startObject("_source");
            mapping.array("excludes", excludeFromSource.toArray(new String[excludeFromSource.size()]));
            mapping.endObject();
        }
        mapping.endObject();
        mapping.endObject();
        return mapping;
        //See: http://www.elasticsearch.org/guide/reference/mapping/core-types.html
    }

    private void createMappingForProperty(String path, XContentBuilder mapping, PropertyConfig property,
            Collection<String> sourceExclude) throws IOException {
        EsMapping[] esMappings = property.getAllEsMappings();
        if (esMappings.length > 1) {
            mapping.startObject(property.getName());
            mapping.field("type", "multi_field");
            mapping.startObject("fields");
        }
        for (EsMapping esm : esMappings)
            createMapping(path, mapping, property, sourceExclude, esm);
        if (esMappings.length > 1) {
            mapping.endObject();//fields
            mapping.endObject();//property.getName
        }
    }

    private void createMapping(String path, XContentBuilder mapping, PropertyConfig property,
            Collection<String> sourceExclude, EsMapping esm) throws IOException {
        String name = esm.getName(property.getName());
        mapping.startObject(name);
        mapping.field("type", esm.type);
        esm.createMappingProperties(mapping);
        if (property.getBoost() != 1.0) //TODO: per MappingInfo not per Property!
            mapping.field("boost", property.getBoost());
        if (property.isObject() && OBJECT.equals(esm.type)) {
            mapping.startObject("properties");
            mapping.startObject(FIELD_OBJECT_URI);
            mapping.field("type", STRING);
            mapping.field("index", NOT_ANALYZED);
            mapping.endObject();//property.getName()
            for (PropertyConfig sub : property.getSubProperties())
                createMappingForProperty(path + name + ".", mapping, sub, sourceExclude);
            mapping.endObject();//properties
        }
        if (property.isSourceExclude())
            sourceExclude.add(path + name);
        mapping.endObject();//esm.getName(property.getName())
    }

    @Override
    public Var getResultVarFromFunctionCall(URI function, List<ValueExpr> args) {
        AbstractResolveSearchArg resolver = SUPPORTED_FUNCTIONS.get(function);
        if (resolver == null)
            return null;
        return resolver.getResultVarFromFunctionCall(function, args);
    }

    @Override
    public boolean isIndexedConstraint(IndexerExpr expr, StatementPattern pattern) {
        return false;
    }

    public boolean isResultProperty(URI uri) {
        return ElasticSearchIndexer.PROPERTY_BINDERS.keySet().contains(uri.stringValue());
    }

    static final class EsMapping {
        private static final String FIELD_COORDINATES = "coordinates";
        private static final String TYPE_ENVELOPE = "envelope";
        private static final String FIELD_TYPE = "type";

        private final Collection<URI> typeUris;
        private final String namePostfix; // added to the fieldname if multiple es fields map to one property
        private final String type; // string, float, ...
        private final String index; // analyzed, not_analyzed, no
        private final String indexAnalyzer;
        private final String searchAnalyzer;
        private final Boolean includeInAll;// Should the field be included in the _all field. Defaults to elasticsearch default for the type (or to the parent object type setting).
        private boolean isResource; //this maps a Resource (currently only URI is supported, in the future we might support BNode for some stores)

        EsMapping(URI typeUri, String namePostfix, String type) {
            this(typeUri, namePostfix, type, null, null);
        }

        private EsMapping(URI typeUri, String namePostfix, String type, String index) {
            this(typeUri, namePostfix, type, index, null);
        }

        private EsMapping(URI[] typeUris, String namePostfix, String type) {
            this(typeUris, namePostfix, type, null, null, null, null);
        }

        private EsMapping(URI typeUri, String namePostfix, String type, String index, Boolean includeInAll) {
            this(typeUri, namePostfix, type, index, null, null, includeInAll);
        }

        private EsMapping(URI typeUri, String namePostfix, String type, String index, String indexAnalyzer,
                String searchAnalyzer, Boolean includeInAll) {
            this(new URI[] { typeUri }, namePostfix, type, index, indexAnalyzer, searchAnalyzer, includeInAll);
        }

        private EsMapping(URI[] typeUris, String namePostfix, String type, String index, String indexAnalyzer,
                String searchAnalyzer, Boolean includeInAll) {
            this.typeUris = of(typeUris);
            this.namePostfix = namePostfix;
            this.type = type;
            this.index = index;
            this.indexAnalyzer = indexAnalyzer;
            this.searchAnalyzer = searchAnalyzer;
            this.includeInAll = includeInAll;
        }

        private static Collection<URI> of(URI[] uris) {
            if (uris == null)
                return null;
            if (uris.length == 1) {
                if (uris[0] == null)
                    return null;
                return Collections.singleton(uris[0]);
            }
            HashSet<URI> result = new HashSet<URI>(uris.length);
            for (URI uri : uris)
                result.add(uri);
            return result;
        }

        public void createMappingProperties(XContentBuilder mapping) throws IOException {
            if (index != null)
                mapping.field("index", index);
            if (includeInAll != null)
                mapping.field("include_in_all", includeInAll);
            if (indexAnalyzer != null) {
                if (searchAnalyzer != null) {
                    if (indexAnalyzer.equals(searchAnalyzer))
                        mapping.field("analyzer", indexAnalyzer);
                    else {
                        mapping.field("index_analyzer", indexAnalyzer);
                        mapping.field("search_analyzer", searchAnalyzer);
                    }
                } else
                    mapping.field("index_analyzer", indexAnalyzer);
            } else if (searchAnalyzer != null)
                mapping.field("search_analyzer", searchAnalyzer);
        }

        private EsMapping uri() {
            this.isResource = true;
            return this;
        }

        public String getName(String name) {
            if (namePostfix != null)
                return namePostfix;
            return name;
        }

        public Object getStoreValue(Resource resource, URI predicate, Value object) {
            if (object instanceof BNode)
                throw error("Indexing of BNodes is not supported", resource, predicate, object);
            if (isResource) {
                if (!(object instanceof Resource))
                    throw error("Expecting URI as object", resource, predicate, object);
                return object.stringValue();
            } else {
                if (!(object instanceof Literal))
                    throw error("Expecting Literal as object", resource, predicate, object);
                Literal literal = (Literal) object;
                return createStoreValueForLiteral(resource, predicate, object, literal);
            }
        }

        private Object createStoreValueForLiteral(Resource resource, URI predicate, Value object, Literal literal) {
            //play.Logger.info("Type: " + type + " Literal type: " + literal.getDatatype().toString());

            if (literal.getDatatype() == null) {
                if (typeUris != null)
                    throw error("Expecting type to be one of " + typeUris + " for object", resource, predicate,
                            object);
                if (literal.getLanguage() != null)
                    throw error("Language tags are not supported", resource, predicate, object);
                return object.stringValue();
            } else if (typeUris == null)
                throw error("Unexpected type " + literal.getDatatype() + " for object", resource, predicate,
                        object);
            else
                return datatypeValue(literal);
        }

        public Object getFilterValue(Value object) {
            //TODO: this should be relaxed to also use non-matching types in filters (should be reflected in the filter outcome)
            //TODO: types of both the filter value and possible index values should be checked, and the filter changed accordingly
            //TODO: how to handle language tags in filters?
            //TODO: numeric string values should be treated as strings, not numbers...
            if (object instanceof BNode)
                throw error("Filtering on BNodes is not supported", object);
            if (object instanceof URI)
                return object.stringValue();
            else {
                Literal literal = (Literal) object;
                if (typeUris == null)
                    return object.stringValue();
                return datatypeValue(typeUris.iterator().next(), literal);
            }
        }

        private Object datatypeValue(Literal literal) {
            for (URI uri : typeUris) {
                if (uri.equals(literal.getDatatype()))
                    return datatypeValue(uri, literal);
            }
            throw error("Expecting type to be one of " + typeUris + " for object", literal);
        }

        private Object datatypeValue(URI typeUri, Literal literal) {
            if (XMLSchema.BOOLEAN.equals(typeUri))
                return literal.booleanValue();
            if (XMLSchema.BYTE.equals(typeUri))
                return literal.byteValue();
            if (XMLSchema.DATE.equals(typeUri)) //TODO ??
                return literal.calendarValue().normalize().toGregorianCalendar().getTime();
            if (XMLSchema.DATETIME.equals(typeUri))
                return literal.calendarValue().normalize().toGregorianCalendar().getTime();
            if (XMLSchema.DOUBLE.equals(typeUri))
                return literal.doubleValue();
            if (XMLSchema.FLOAT.equals(typeUri))
                return literal.floatValue();
            if (XMLSchema.INT.equals(typeUri))
                return literal.intValue();
            if (XMLSchema.LONG.equals(typeUri))
                return literal.longValue();
            if (XMLSchema.STRING.equals(typeUri))
                return literal.stringValue();
            if (GeoConvert.isSupported(typeUri))
                return asEnvelope(literal);
            Validate.isTrue(XMLSchema.SHORT.equals(typeUri));
            return literal.shortValue();
        }

        //TODO: would be better if we indexed the geometry instead of its envelope?
        //TODO: handling of envelope.isEmpty
        //TODO: handle point data with a point instead of an envelope?
        private byte[] asEnvelope(Literal literal) {
            Geometry geo;
            try {
                geo = GeoConvert.toGeometry(literal, false);
            } catch (InvalidGeometryException e) {
                throw new IllegalStateException(e);
                //TODO: we should just not index it, or index the empty geometry?
                //TODO: we should not throw IllegalStateException for user-data
            }
            Envelope envelope = geo.getEnvelopeInternal();
            StringBuilder sb = new StringBuilder();
            sb.append('{').append(FIELD_TYPE).append(":\"").append(TYPE_ENVELOPE).append("\",")
                    .append(FIELD_COORDINATES).append(":[");
            sb.append("[").append(envelope.getMinX()).append(',').append(envelope.getMaxY()).append("],");
            sb.append("[").append(envelope.getMaxX()).append(',').append(envelope.getMinY()).append("]");
            sb.append("]}");
            try {
                return sb.toString().getBytes("UTF-8");
            } catch (UnsupportedEncodingException e) {
                throw new IllegalStateException(e);
            }
        }

        private IndexException error(String message, Value field) {
            throw new IndexException(message + ": " + field.stringValue());
        }

        private IndexException error(String message, Resource resource, URI predicate, Value field) {
            if (resource != null && predicate != null)
                throw new IndexException(message + ": " + resource.stringValue() + " " + predicate.stringValue()
                        + " " + field.stringValue());
            throw new IndexException(message + ": " + field.stringValue());
        }
    }

    /**
     * omit_norms Boolean value if norms should be omitted or not. Defaults to false.
     * omit_term_freq_and_positions Boolean value if term freq and positions should be omitted. Defaults to false.
     * analyzer The analyzer used to analyze the text contents when analyzed during indexing and when searching using a query string. Defaults to the globally configured analyzer.
     * index_analyzer The analyzer used to analyze the text contents when analyzed during indexing.
     * search_analyzer The analyzer used to analyze the field when part of a query string.
     * include_in_all Should the field be included in the _all field (if enabled). Defaults to true or to the parent object type setting.
     */
}