fr.ortolang.diffusion.indexing.elastic.ElasticSearchServiceBean.java Source code

Java tutorial

Introduction

Here is the source code for fr.ortolang.diffusion.indexing.elastic.ElasticSearchServiceBean.java

Source

package fr.ortolang.diffusion.indexing.elastic;

/*
 * #%L
 * ORTOLANG
 * A online network structure for hosting language resources and tools.
 *
 * Jean-Marie Pierrel / ATILF UMR 7118 - CNRS / Universit de Lorraine
 * Etienne Petitjean / ATILF UMR 7118 - CNRS
 * Jrme Blanchard / ATILF UMR 7118 - CNRS
 * Bertrand Gaiffe / ATILF UMR 7118 - CNRS
 * Cyril Pestel / ATILF UMR 7118 - CNRS
 * Marie Tonnelier / ATILF UMR 7118 - CNRS
 * Ulrike Fleury / ATILF UMR 7118 - CNRS
 * Frdric Pierre / ATILF UMR 7118 - CNRS
 * Cline Moro / ATILF UMR 7118 - CNRS
 *
 * This work is based on work done in the equipex ORTOLANG (http://www.ortolang.fr/), by several Ortolang contributors (mainly CNRTL and SLDR)
 * ORTOLANG is funded by the French State program "Investissements d'Avenir" ANR-11-EQPX-0032
 * %%
 * Copyright (C) 2013 - 2015 Ortolang Team
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Lesser Public License for more details.
 *
 * You should have received a copy of the GNU General Lesser Public
 * License along with this program.  If not, see
 * <http://www.gnu.org/licenses/lgpl-3.0.html>.
 * #L%
 */

import fr.ortolang.diffusion.*;
import fr.ortolang.diffusion.indexing.IndexingServiceException;
import fr.ortolang.diffusion.indexing.OrtolangIndexableContent;
import fr.ortolang.diffusion.registry.KeyNotFoundException;
import fr.ortolang.diffusion.registry.RegistryService;
import fr.ortolang.diffusion.registry.RegistryServiceException;
import fr.ortolang.diffusion.search.SearchQuery;
import fr.ortolang.diffusion.search.SearchResult;
import fr.ortolang.diffusion.util.StreamUtils;

import org.apache.commons.io.IOUtils;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.get.GetRequestBuilder;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.IndicesAdminClient;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.index.IndexNotFoundException;
import org.elasticsearch.index.engine.DocumentMissingException;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.aggregations.bucket.nested.InternalNested;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import org.jboss.ejb3.annotation.SecurityDomain;

import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import javax.annotation.security.PermitAll;
import javax.annotation.security.RolesAllowed;
import javax.ejb.EJB;
import javax.ejb.Local;
import javax.ejb.Singleton;
import javax.ejb.Startup;
import java.io.IOException;
import java.io.InputStream;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;

import static org.elasticsearch.script.Script.DEFAULT_SCRIPT_LANG;

@Startup
@Local(ElasticSearchService.class)
@Singleton(name = ElasticSearchService.SERVICE_NAME)
@SecurityDomain("ortolang")
@PermitAll
public class ElasticSearchServiceBean implements ElasticSearchService {

    private static final Logger LOGGER = Logger.getLogger(ElasticSearchServiceBean.class.getName());

    @EJB
    private RegistryService registry;

    private TransportClient client;

    private Map<String, Set<String>> indices;

    private Map<String, String> scripts;

    public ElasticSearchServiceBean() throws IOException {
        LOGGER.log(Level.INFO, "Instantiating Elastic Search Service");
        indices = new HashMap<>();
        scripts = new HashMap<>();
        LOGGER.log(Level.INFO, "Loading Painless scripts");
        LOGGER.log(Level.INFO, "Load updateRootCollectionChild script");
        String script = IOUtils.toString(
                getClass().getClassLoader().getResourceAsStream("indexing/updateRootCollectionChild.painless"),
                StandardCharsets.UTF_8);
        scripts.put("updateRootCollectionChild", script);
    }

    @PostConstruct
    public void init() {
        LOGGER.log(Level.INFO, "Initializing Elastic Search Service");
        try {
            if (OrtolangConfig.getInstance().getProperty(OrtolangConfig.Property.ELASTIC_SEARCH_HOST) == null
                    || OrtolangConfig.getInstance().getProperty(OrtolangConfig.Property.ELASTIC_SEARCH_HOST)
                            .length() == 0) {
                LOGGER.log(Level.INFO, "Elastic Search not configured, skipping initialization");
                client = null;
                return;
            }
            String[] hosts = OrtolangConfig.getInstance().getProperty(OrtolangConfig.Property.ELASTIC_SEARCH_HOST)
                    .split(",");
            String[] ports = OrtolangConfig.getInstance().getProperty(OrtolangConfig.Property.ELASTIC_SEARCH_PORT)
                    .split(",");

            if (hosts.length != ports.length) {
                throw new IllegalStateException(
                        "Elastic search configuration incorrect: host number and port number should be equal");
            }
            Settings settings = Settings.builder().put("cluster.name", "ortolang").build();
            client = new PreBuiltTransportClient(settings);
            for (int i = 0; i < hosts.length; i++) {
                client.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(hosts[i]),
                        Integer.parseInt(ports[i])));
            }
        } catch (UnknownHostException e) {
            LOGGER.log(Level.SEVERE, e.getMessage(), e);
        }
    }

    @PreDestroy
    public void shutdown() {
        LOGGER.log(Level.INFO, "Shutting down Elastic Search Service");
        client.close();
    }

    @Override
    public void index(String key) throws ElasticSearchServiceException {
        try {
            LOGGER.log(Level.FINEST, "Starting to index key [" + key + "]");
            if (client == null || client.connectedNodes().isEmpty()) {
                return;
            }
            OrtolangObjectIdentifier identifier = registry.lookup(key);
            OrtolangIndexableService service = OrtolangServiceLocator.findIndexableService(identifier.getService());
            List<OrtolangIndexableContent> indexableContents = service.getIndexableContent(key);
            for (OrtolangIndexableContent indexableContent : indexableContents) {
                LOGGER.log(Level.FINEST, "Start to index key [" + indexableContent.getId() + "] of type ["
                        + indexableContent.getType() + "] in index [" + indexableContent.getIndex() + "]");
                if (!indexableContent.isEmpty()) {
                    try {
                        IndicesAdminClient adminClient = client.admin().indices();
                        if (!indices.containsKey(indexableContent.getIndex())) {
                            // Check if index exists and create it if not
                            if (adminClient.prepareExists(indexableContent.getIndex()).get().isExists()) {
                                indices.put(indexableContent.getIndex(), new HashSet<>());
                                checkType(indexableContent, adminClient);
                            } else {
                                LOGGER.log(Level.FINE, "Creating index [" + indexableContent.getIndex()
                                        + "] and add mapping for type [" + indexableContent.getType() + "]");
                                CreateIndexRequestBuilder requestBuilder = adminClient
                                        .prepareCreate(indexableContent.getIndex());
                                InputStream settings = this.getClass().getResourceAsStream(
                                        PATH_TO_SETTINGS + indexableContent.getIndex() + EXTENSION_MAPPING);
                                if (settings != null) {
                                    String settingsAsString = StreamUtils.getContent(settings);
                                    if (settingsAsString != null) {
                                        requestBuilder.setSettings(settingsAsString);
                                    } else {
                                        LOGGER.log(Level.SEVERE,
                                                "Unable to set configuration : cannot read file : "
                                                        + PATH_TO_SETTINGS + indexableContent.getIndex()
                                                        + EXTENSION_MAPPING);
                                    }
                                } else {
                                    LOGGER.log(Level.WARNING, "Unable to set configuration : file not found : "
                                            + PATH_TO_SETTINGS + indexableContent.getIndex() + EXTENSION_MAPPING);
                                }
                                InputStream mapping = this.getClass().getResourceAsStream(
                                        PATH_TO_MAPPINGS + indexableContent.getType() + EXTENSION_MAPPING);
                                if (mapping != null) {
                                    String mappingAsString = StreamUtils.getContent(mapping);
                                    if (mappingAsString != null) {
                                        requestBuilder.addMapping(indexableContent.getType(), mappingAsString);
                                    } else {
                                        LOGGER.log(Level.SEVERE,
                                                "Unable to put mapping : cannot read file : " + PATH_TO_MAPPINGS
                                                        + indexableContent.getType() + EXTENSION_MAPPING);
                                    }
                                } else {
                                    LOGGER.log(Level.WARNING, "Unable to put mapping : file not found : "
                                            + PATH_TO_MAPPINGS + indexableContent.getType() + EXTENSION_MAPPING);
                                }
                                requestBuilder.get();
                                HashSet<String> types = new HashSet<>();
                                types.add(indexableContent.getType());
                                indices.put(indexableContent.getIndex(), types);
                            }
                        } else {
                            checkType(indexableContent, adminClient);
                        }
                        if (indexableContent.isUpdate()) {
                            LOGGER.log(Level.FINE,
                                    "Updating key [" + indexableContent.getId() + "] in index ["
                                            + indexableContent.getIndex() + "] with type ["
                                            + indexableContent.getType() + "]");

                            Script script = new Script(ScriptType.INLINE, DEFAULT_SCRIPT_LANG,
                                    scripts.get(indexableContent.getScript()), indexableContent.getScriptParams());
                            client.prepareUpdate(indexableContent.getIndex(), indexableContent.getType(),
                                    indexableContent.getId()).setScript(script).get();
                        } else {
                            LOGGER.log(Level.FINE,
                                    "Indexing key [" + indexableContent.getId() + "] in index ["
                                            + indexableContent.getIndex() + "] with type ["
                                            + indexableContent.getType() + "]");
                            client.prepareIndex(indexableContent.getIndex(), indexableContent.getType(),
                                    indexableContent.getId()).setSource(indexableContent.getContent()).get();
                        }
                    } catch (IndexNotFoundException e) {
                        LOGGER.log(Level.INFO,
                                "Index not found: removing it from registry and re-trying to index key [" + key
                                        + "]");
                        indices.remove(indexableContent.getIndex());
                        index(key);
                        return;
                    } catch (IllegalArgumentException e) {
                        LOGGER.log(Level.WARNING,
                                "IllegalArgumentException for key [" + indexableContent.getId() + "] with type ["
                                        + indexableContent.getType() + "] in index [" + indexableContent.getIndex()
                                        + "]",
                                e);
                        return;
                    } catch (DocumentMissingException e) {
                        LOGGER.log(Level.WARNING,
                                "Document missing for key [" + indexableContent.getId() + "] with type ["
                                        + indexableContent.getType() + "] in index [" + indexableContent.getIndex()
                                        + "] " + e.getMessage());
                    } catch (MapperParsingException e) {
                        LOGGER.log(Level.WARNING,
                                "MapperParsingException for key [" + indexableContent.getId() + "] with type ["
                                        + indexableContent.getType() + "] in index [" + indexableContent.getIndex()
                                        + "]",
                                e);
                        throw new ElasticSearchServiceException(e.getMessage(), e);
                    } catch (Exception e) {
                        LOGGER.log(Level.SEVERE, "An unexpected error happened while indexing key ["
                                + indexableContent.getId() + "]", e);
                        // TODO throw exception ???
                        return;
                    }
                }
            }
        } catch (OrtolangException | KeyNotFoundException | RegistryServiceException | IndexingServiceException e) {
            throw new ElasticSearchServiceException("An unexpected error happened while indexing key [" + key + "]",
                    e);
        }
    }

    @Override
    public void remove(String key) throws ElasticSearchServiceException {
        // TODO not implemented
    }

    @Override
    public SearchResult search(SearchQuery query) {
        LOGGER.log(Level.FINE, "Search in " + query.getIndex() + " and type " + query.getType() + " with query "
                + query.getQuery());
        SearchRequestBuilder searchRequest = searchRequest(query, client);

        LOGGER.log(Level.FINE, searchRequest.toString());
        SearchResponse searchResponse = searchRequest.get();

        // Parses
        SearchResult result = new SearchResult();
        result.setTotalHits(searchResponse.getHits().getTotalHits());
        result.setHits(searchResponse.getHits().getHits());
        if (query.getAggregations() != null) {
            ElasticSearchServiceBean.fillAggregations(query.getAggregations(), searchResponse, result);
        }
        return result;
    }

    @Override
    @RolesAllowed({ "admin", "system" })
    public SearchResult systemSearch(SearchQuery query) {
        LOGGER.log(Level.FINE, "Search in " + query.getIndex() + " and type " + query.getType() + " with query "
                + query.getQuery());
        SearchRequestBuilder searchRequest = searchRequest(query, client);

        LOGGER.log(Level.FINE, searchRequest.toString());
        SearchResponse searchResponse = searchRequest.get();

        // Parses
        SearchResult result = new SearchResult();
        result.setTotalHits(searchResponse.getHits().getTotalHits());
        result.setHits(searchResponse.getHits().getHits());
        if (query.getAggregations() != null) {
            ElasticSearchServiceBean.fillAggregations(query.getAggregations(), searchResponse, result);
        }
        return result;
    }

    protected static SearchRequestBuilder searchRequest(SearchQuery query, TransportClient client) {
        SearchRequestBuilder searchRequest = client.prepareSearch();
        if (!query.getQuery().isEmpty()) {
            QueryBuilder queryBuilder = ElasticSearchQueryParser.parse(query.getQuery());
            if (queryBuilder != null) {
                searchRequest.setQuery(queryBuilder);
            }
        }
        if (query.getIndex() != null) {
            searchRequest.setIndices(query.getIndex());
        }
        if (query.getType() != null) {
            searchRequest.setTypes(query.getType());
        }
        if (query.getSize() != null) {
            searchRequest.setSize(query.getSize());
        }
        if (query.hasOrder()) {
            searchRequest.addSort(query.getOrderProp(), SortOrder.fromString(query.getOrderDir()));
        }
        searchRequest.setFetchSource(query.getIncludes(), query.getExcludes());
        if (query.getAggregations() != null) {
            for (String agg : query.getAggregations()) {
                searchRequest.addAggregation(ElasticSearchAggregationParser.parse(agg));
            }
        }
        return searchRequest;
    }

    protected static void fillAggregations(String[] aggrs, SearchResponse searchResponse, SearchResult result) {
        for (String agg : aggrs) {
            String aggPath = agg;
            String aggField = agg;
            String[] aggNameSplit = agg.split(":");
            if (aggNameSplit.length > 1) {
                aggPath = aggNameSplit[0];
                aggField = aggNameSplit[1];
            }
            //TODO optimizes by using forEach method
            if (aggPath.endsWith("[]")) {
                String aggName = aggPath.substring(0, aggPath.length() - 2);
                if (aggField.endsWith("[]")) {
                    aggField = aggField.substring(0, aggField.length() - 2);
                }
                InternalNested nestedAgg = searchResponse.getAggregations().get(aggName);
                Terms terms = nestedAgg.getAggregations().get("content");
                if (terms != null) {
                    for (Terms.Bucket bucket : terms.getBuckets()) {
                        result.addAggregation(aggName, bucket.getKeyAsString());
                    }
                }
            } else {
                Terms terms = searchResponse.getAggregations().get(aggPath);
                if (terms != null) {
                    for (Terms.Bucket bucket : terms.getBuckets()) {
                        result.addAggregation(aggPath, bucket.getKeyAsString());
                    }
                }
            }
        }
    }

    @Override
    public String get(String index, String type, String id) {
        GetRequestBuilder requestBuilder = client.prepareGet(index, type, id);

        GetResponse getResponse = requestBuilder.get();
        if (getResponse.isExists()) {
            return getResponse.getSourceAsString();
        }
        return null;
    }

    private void checkType(OrtolangIndexableContent indexableContent, IndicesAdminClient adminClient) {
        if (!indices.get(indexableContent.getIndex()).contains(indexableContent.getType())) {
            LOGGER.log(Level.FINE, "[checkType] Put mapping for type [" + indexableContent.getType()
                    + "] in index [" + indexableContent.getIndex() + "]");
            try {
                InputStream mapping = this.getClass()
                        .getResourceAsStream(PATH_TO_MAPPINGS + indexableContent.getType() + EXTENSION_MAPPING);
                if (mapping != null) {
                    String mappingAsString;
                    mappingAsString = StreamUtils.getContent(mapping);
                    if (mappingAsString != null) {
                        adminClient.preparePutMapping(indexableContent.getIndex())
                                .setType(indexableContent.getType()).setSource(mappingAsString).get();
                    } else {
                        LOGGER.log(Level.SEVERE, "Unable to put mapping : cannot read file : " + PATH_TO_MAPPINGS
                                + indexableContent.getType() + EXTENSION_MAPPING);
                    }
                } else {
                    LOGGER.log(Level.SEVERE, "Unable to put mapping : file not found : " + PATH_TO_MAPPINGS
                            + indexableContent.getType() + EXTENSION_MAPPING);
                }

                indices.get(indexableContent.getIndex()).add(indexableContent.getType());
            } catch (IOException e) {
                LOGGER.log(Level.SEVERE, "Unabled to put mapping for type [" + indexableContent.getType()
                        + "] in index [" + indexableContent.getIndex() + "]", e);
            }
        }
    }

    @Override
    public String getServiceName() {
        return ElasticSearchService.SERVICE_NAME;
    }

    @Override
    public Map<String, String> getServiceInfos() {
        return Collections.emptyMap();
    }

}