org.exoplatform.addons.es.index.impl.ElasticIndexingOperationProcessor.java Source code

Java tutorial

Introduction

Here is the source code for org.exoplatform.addons.es.index.impl.ElasticIndexingOperationProcessor.java

Source

/* 
 * Copyright (C) 2003-2015 eXo Platform SAS.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program. If not, see http://www.gnu.org/licenses/ .
 */
package org.exoplatform.addons.es.index.impl;

import org.apache.commons.lang.StringUtils;
import org.exoplatform.addons.es.client.ElasticContentRequestBuilder;
import org.exoplatform.addons.es.client.ElasticIndexingAuditTrail;
import org.exoplatform.addons.es.client.ElasticIndexingClient;
import org.exoplatform.addons.es.dao.IndexingOperationDAO;
import org.exoplatform.addons.es.domain.IndexingOperation;
import org.exoplatform.addons.es.domain.OperationType;
import org.exoplatform.addons.es.index.IndexingOperationProcessor;
import org.exoplatform.addons.es.index.IndexingServiceConnector;
import org.exoplatform.commons.api.persistence.DataInitializer;
import org.exoplatform.commons.utils.PropertyManager;
import org.exoplatform.services.log.ExoLogger;
import org.exoplatform.services.log.Log;
import org.picocontainer.Startable;

import java.util.*;

/**
 * Created by The eXo Platform SAS Author : Thibault Clement
 * tclement@exoplatform.com 10/12/15
 */
public class ElasticIndexingOperationProcessor extends IndexingOperationProcessor implements Startable {

    private static final Log LOG = ExoLogger.getExoLogger(ElasticIndexingOperationProcessor.class);
    private static final String BATCH_NUMBER_PROPERTY_NAME = "exo.es.indexing.batch.number";
    private static final Integer BATCH_NUMBER_DEFAULT = 1000;
    private static final String REQUEST_SIZE_LIMIT_PROPERTY_NAME = "exo.es.indexing.request.size.limit";
    /** in bytes, default=10MB **/
    private static final Integer REQUEST_SIZE_LIMIT_DEFAULT = 10485760;
    private static final String REINDEXING_BATCH_SIZE_PROPERTY_NAME = "exo.es.reindex.batch.size";
    private static final int REINDEXING_BATCH_SIZE_DEFAULT_VALUE = 100;

    // Service
    private final IndexingOperationDAO indexingOperationDAO;
    private final ElasticIndexingClient elasticIndexingClient;
    private final ElasticContentRequestBuilder elasticContentRequestBuilder;
    private final ElasticIndexingAuditTrail auditTrail;
    private Integer batchNumber = BATCH_NUMBER_DEFAULT;
    private Integer requestSizeLimit = REQUEST_SIZE_LIMIT_DEFAULT;
    private int reindexBatchSize = REINDEXING_BATCH_SIZE_DEFAULT_VALUE;

    public ElasticIndexingOperationProcessor(IndexingOperationDAO indexingOperationDAO,
            ElasticIndexingClient elasticIndexingClient, ElasticContentRequestBuilder elasticContentRequestBuilder,
            ElasticIndexingAuditTrail auditTrail, DataInitializer dataInitializer) {
        this.indexingOperationDAO = indexingOperationDAO;
        this.auditTrail = auditTrail;
        this.elasticIndexingClient = elasticIndexingClient;
        this.elasticContentRequestBuilder = elasticContentRequestBuilder;
        if (StringUtils.isNotBlank(PropertyManager.getProperty(BATCH_NUMBER_PROPERTY_NAME))) {
            this.batchNumber = Integer.valueOf(PropertyManager.getProperty(BATCH_NUMBER_PROPERTY_NAME));
        }
        if (StringUtils.isNotBlank(PropertyManager.getProperty(REQUEST_SIZE_LIMIT_PROPERTY_NAME))) {
            this.requestSizeLimit = Integer.valueOf(PropertyManager.getProperty(REQUEST_SIZE_LIMIT_PROPERTY_NAME));
        }
        if (StringUtils.isNotBlank(PropertyManager.getProperty(REINDEXING_BATCH_SIZE_PROPERTY_NAME))) {
            this.reindexBatchSize = Integer
                    .valueOf(PropertyManager.getProperty(REINDEXING_BATCH_SIZE_PROPERTY_NAME));
        }
    }

    @Override
    public void addConnector(IndexingServiceConnector indexingServiceConnector) {
        addConnector(indexingServiceConnector, false);
    }

    @Override
    public void addConnector(IndexingServiceConnector indexingServiceConnector, Boolean override) {
        if (getConnectors().containsKey(indexingServiceConnector.getType()) && override.equals(false)) {
            LOG.error("Impossible to add connector {}. A connector with the same name has already been registered.",
                    indexingServiceConnector.getType());
        } else {
            getConnectors().put(indexingServiceConnector.getType(), indexingServiceConnector);
            LOG.info("An Indexing Connector has been added: {}", indexingServiceConnector.getType());
        }
    }

    /**
     * Handle the Indexing queue Get all data in the indexing queue, transform
     * them to ES requests, send requests to ES This method is ONLY called by the
     * job scheduler. This method is not annotated with @ExoTransactional because
     * we don't want it to be executed in one transaction. Every
     */
    @Override
    public void process() {
        // Loop until the number of data retrieved from indexing queue is less than
        // BATCH_NUMBER (default = 1000)
        int processedOperations;
        do {
            processedOperations = processBulk();
        } while (processedOperations >= batchNumber);
    }

    private int processBulk() {
        // Map<OperationType={Create,Delete,...}, Map<String=EntityType,
        // List<IndexingOperation>>> indexingQueueSorted
        Map<OperationType, Map<String, List<IndexingOperation>>> indexingQueueSorted = new HashMap<>();
        List<IndexingOperation> indexingOperations;
        long maxIndexingOperationId = 0;

        // Get BATCH_NUMBER (default = 1000) first indexing operations
        indexingOperations = indexingOperationDAO.findAllFirst(batchNumber);

        // Get all Indexing operations and order them per operation and type in map:
        // <Operation, <Type, List<IndexingOperation>>>
        for (IndexingOperation indexingOperation : indexingOperations) {
            putIndexingOperationInMemoryQueue(indexingOperation, indexingQueueSorted);
            // Get the max ID of IndexingOperation of the bulk
            if (maxIndexingOperationId < indexingOperation.getId()) {
                maxIndexingOperationId = indexingOperation.getId();
            }
        }

        processInit(indexingQueueSorted);
        processDeleteAll(indexingQueueSorted);
        processReindexAll(indexingQueueSorted);
        processCUD(indexingQueueSorted);

        // Removes the processed IDs from the indexing queue? table that have
        // timestamp older than the timestamp of
        // start of processing
        indexingOperationDAO.deleteAllIndexingOperationsHavingIdLessThanOrEqual(maxIndexingOperationId);
        return indexingOperations.size();
    }

    /**
     * Add an indexing operation to the Temporary inMemory IndexingQueue
     * 
     * @param indexingOperation the operation to add to the Temporary inMemory
     *          IndexingQueue
     * @param indexingQueueSorted Temporary inMemory IndexingQueue
     */
    private void putIndexingOperationInMemoryQueue(IndexingOperation indexingOperation,
            Map<OperationType, Map<String, List<IndexingOperation>>> indexingQueueSorted) {
        // Check if the Indexing Operation map already contains a specific operation
        if (!indexingQueueSorted.containsKey(indexingOperation.getOperation())) {
            // If not add a new operation in the map
            indexingQueueSorted.put(indexingOperation.getOperation(),
                    new HashMap<String, List<IndexingOperation>>());
        }
        // Check if the operation map already contains a specific type
        if (!indexingQueueSorted.get(indexingOperation.getOperation())
                .containsKey(indexingOperation.getEntityType())) {
            // If not add a new type for the operation above
            indexingQueueSorted.get(indexingOperation.getOperation()).put(indexingOperation.getEntityType(),
                    new ArrayList<IndexingOperation>());
        }
        // Add the indexing operation in the specific Operation -> Type
        indexingQueueSorted.get(indexingOperation.getOperation()).get(indexingOperation.getEntityType())
                .add(indexingOperation);
    }

    /**
     * Process all the Create / Update / Delete operations
     * 
     * @param indexingQueueSorted Temporary inMemory IndexingQueue
     */
    private void processCUD(Map<OperationType, Map<String, List<IndexingOperation>>> indexingQueueSorted) {
        // Initialise bulk request for CUD operations
        String bulkRequest = "";

        // Process Delete document operation
        if (indexingQueueSorted.containsKey(OperationType.DELETE)) {
            for (String entityType : indexingQueueSorted.get(OperationType.DELETE).keySet()) {
                for (IndexingOperation deleteIndexQueue : indexingQueueSorted.get(OperationType.DELETE)
                        .get(entityType)) {
                    bulkRequest += elasticContentRequestBuilder.getDeleteDocumentRequestContent(
                            (ElasticIndexingServiceConnector) getConnectors().get(deleteIndexQueue.getEntityType()),
                            deleteIndexQueue.getEntityId());
                    // Remove the object from other create or update operations planned
                    // before the timestamp of the delete operation
                    deleteOperationsByEntityIdForTypesBefore(new OperationType[] { OperationType.CREATE },
                            indexingQueueSorted, deleteIndexQueue);
                    deleteOperationsByEntityIdForTypes(new OperationType[] { OperationType.UPDATE },
                            indexingQueueSorted, deleteIndexQueue);
                }
            }
            // Remove the delete operations from the map
            indexingQueueSorted.remove(OperationType.DELETE);
        }

        // Process Create document operation
        if (indexingQueueSorted.containsKey(OperationType.CREATE)) {
            for (String entityType : indexingQueueSorted.get(OperationType.CREATE).keySet()) {
                for (IndexingOperation createIndexQueue : indexingQueueSorted.get(OperationType.CREATE)
                        .get(entityType)) {
                    bulkRequest += elasticContentRequestBuilder.getCreateDocumentRequestContent(
                            (ElasticIndexingServiceConnector) getConnectors().get(createIndexQueue.getEntityType()),
                            createIndexQueue.getEntityId());
                    // Remove the object from other update operations for this entityId
                    deleteOperationsByEntityIdForTypes(new OperationType[] { OperationType.UPDATE },
                            indexingQueueSorted, createIndexQueue);
                    // Check if the bulk request limit size is already reached
                    bulkRequest = checkBulkRequestSizeReachedLimitation(bulkRequest);
                }
            }
            // Remove the create operations from the map
            indexingQueueSorted.remove(OperationType.CREATE);
        }

        // Process Update document operation
        if (indexingQueueSorted.containsKey(OperationType.UPDATE)) {
            for (String entityType : indexingQueueSorted.get(OperationType.UPDATE).keySet()) {
                for (IndexingOperation updateIndexQueue : indexingQueueSorted.get(OperationType.UPDATE)
                        .get(entityType)) {
                    bulkRequest += elasticContentRequestBuilder.getUpdateDocumentRequestContent(
                            (ElasticIndexingServiceConnector) getConnectors().get(updateIndexQueue.getEntityType()),
                            updateIndexQueue.getEntityId());
                    // Check if the bulk request limit size is already reached
                    bulkRequest = checkBulkRequestSizeReachedLimitation(bulkRequest);
                }
            }
            // Remove the update operations from the map
            indexingQueueSorted.remove(OperationType.UPDATE);
        }

        if (StringUtils.isNotBlank(bulkRequest)) {
            elasticIndexingClient.sendCUDRequest(bulkRequest);
        }
    }

    /**
     * Process all the requests for init of the ES create mapping? (Operation
     * type = I) in the indexing queue (if any)
     * 
     * @param indexingQueueSorted Temporary inMemory IndexingQueue
     */
    private void processInit(Map<OperationType, Map<String, List<IndexingOperation>>> indexingQueueSorted) {
        if (indexingQueueSorted.containsKey(OperationType.INIT)) {
            for (String entityType : indexingQueueSorted.get(OperationType.INIT).keySet()) {
                sendInitRequests(getConnectors().get(entityType));
            }
            indexingQueueSorted.remove(OperationType.INIT);
        }
    }

    /**
     * Process all the requests for remove all documents of type? (Operation type
     * = X) in the indexing queue (if any) = Delete type in ES
     * 
     * @param indexingQueueSorted Temporary inMemory IndexingQueue
     */
    private void processDeleteAll(Map<OperationType, Map<String, List<IndexingOperation>>> indexingQueueSorted) {
        if (indexingQueueSorted.containsKey(OperationType.DELETE_ALL)) {
            for (String entityType : indexingQueueSorted.get(OperationType.DELETE_ALL).keySet()) {
                if (indexingQueueSorted.get(OperationType.DELETE_ALL).containsKey(entityType)) {
                    for (IndexingOperation indexingOperation : indexingQueueSorted.get(OperationType.DELETE_ALL)
                            .get(entityType)) {
                        processDeleteAll(indexingOperation, indexingQueueSorted);
                    }
                }
            }
            indexingQueueSorted.remove(OperationType.DELETE_ALL);
        }
    }

    /**
     * @param indexingOperation
     * @param indexingQueueSorted Temporary inMemory IndexingQueue
     */
    private void processDeleteAll(IndexingOperation indexingOperation,
            Map<OperationType, Map<String, List<IndexingOperation>>> indexingQueueSorted) {
        // Remove the type (= remove all documents of this type) and recreate it
        ElasticIndexingServiceConnector connector = (ElasticIndexingServiceConnector) getConnectors()
                .get(indexingOperation.getEntityType());
        // log in Audit Trail
        auditTrail.audit(ElasticIndexingAuditTrail.DELETE_ALL, null, null, connector.getType(), null, null, 0);
        // Call ES
        elasticIndexingClient.sendDeleteAllDocsOfTypeRequest(connector.getIndex(), connector.getType());
        // Remove all useless CUD operation that was plan before this delete all
        deleteOperationsForTypesBefore(
                new OperationType[] { OperationType.CREATE, OperationType.UPDATE, OperationType.DELETE },
                indexingQueueSorted, indexingOperation);
    }

    /**
     * Process all the requests for Reindex all documents of type? (Operation
     * type = R) in the indexing queue (if any)
     * 
     * @param indexingQueueSorted Temporary inMemory IndexingQueue
     */
    private void processReindexAll(Map<OperationType, Map<String, List<IndexingOperation>>> indexingQueueSorted) {
        List<IndexingOperation> operations;
        List<String> ids;
        int numberIndexed;
        int offset;

        if (indexingQueueSorted.containsKey(OperationType.REINDEX_ALL)) {
            for (String entityType : indexingQueueSorted.get(OperationType.REINDEX_ALL).keySet()) {
                if (indexingQueueSorted.get(OperationType.REINDEX_ALL).containsKey(entityType)) {
                    for (IndexingOperation indexingOperation : indexingQueueSorted.get(OperationType.REINDEX_ALL)
                            .get(entityType)) {
                        long startTime = System.currentTimeMillis();
                        // 1- Delete all documents in ES (and purge the indexing queue)
                        indexingOperationDAO
                                .create(new IndexingOperation(null, entityType, OperationType.DELETE_ALL));
                        // 2- Get all the documents ID
                        IndexingServiceConnector connector = getConnectors().get(indexingOperation.getEntityType());
                        // 3- Inject as a CUD operation
                        offset = 0;
                        do {
                            ids = connector.getAllIds(offset, reindexBatchSize);
                            if (ids == null) {
                                numberIndexed = 0;
                            } else {
                                operations = new ArrayList<>(ids.size());
                                for (String id : ids) {
                                    operations.add(new IndexingOperation(id, entityType, OperationType.CREATE));
                                }
                                indexingOperationDAO.createAll(operations);
                                numberIndexed = ids.size();
                                offset += reindexBatchSize;
                            }
                        } while (numberIndexed == reindexBatchSize);
                        // 4- log in Audit Trail
                        auditTrail.audit(ElasticIndexingAuditTrail.REINDEX_ALL, null, null, entityType, null, null,
                                (System.currentTimeMillis() - startTime));
                    }
                }
            }
            indexingQueueSorted.remove(OperationType.REINDEX_ALL);
        }
    }

    private void deleteOperationsForTypesBefore(OperationType[] operations,
            Map<OperationType, Map<String, List<IndexingOperation>>> indexingQueueSorted,
            IndexingOperation refIindexOperation) {
        for (OperationType operation : operations) {
            if (indexingQueueSorted.containsKey(operation)) {
                if (indexingQueueSorted.get(operation).containsKey(refIindexOperation.getEntityType())) {
                    for (Iterator<IndexingOperation> iterator = indexingQueueSorted.get(operation)
                            .get(refIindexOperation.getEntityType()).iterator(); iterator.hasNext();) {
                        IndexingOperation indexingOperation = iterator.next();
                        // Check timestamp higher than the timestamp of the reference
                        // indexing operation, the index operation is removed
                        if (refIindexOperation.getId() > indexingOperation.getId()) {
                            iterator.remove();
                        }
                    }
                }
            }
        }
    }

    private void deleteOperationsByEntityIdForTypesBefore(OperationType[] operations,
            Map<OperationType, Map<String, List<IndexingOperation>>> indexingQueueSorted,
            IndexingOperation indexQueue) {
        for (OperationType operation : operations) {
            if (indexingQueueSorted.containsKey(operation)) {
                if (indexingQueueSorted.get(operation).containsKey(indexQueue.getEntityType())) {
                    for (Iterator<IndexingOperation> iterator = indexingQueueSorted.get(operation)
                            .get(indexQueue.getEntityType()).iterator(); iterator.hasNext();) {
                        IndexingOperation indexingOperation = iterator.next();
                        // Check Id higher than the Id of the CUD indexing queue, the index
                        // queue is removed
                        if ((indexQueue.getId() > indexingOperation.getId())
                                && indexingOperation.getEntityId().equals(indexQueue.getEntityId())) {
                            iterator.remove();
                        }
                    }
                }
            }
        }
    }

    private void deleteOperationsByEntityIdForTypes(OperationType[] operations,
            Map<OperationType, Map<String, List<IndexingOperation>>> indexingQueueSorted,
            IndexingOperation indexQueue) {
        for (OperationType operation : operations) {
            if (indexingQueueSorted.containsKey(operation)) {
                if (indexingQueueSorted.get(operation).containsKey(indexQueue.getEntityType())) {
                    for (Iterator<IndexingOperation> iterator = indexingQueueSorted.get(operation)
                            .get(indexQueue.getEntityType()).iterator(); iterator.hasNext();) {
                        IndexingOperation indexingOperation = iterator.next();
                        if (indexingOperation.getEntityId().equals(indexQueue.getEntityId())) {
                            iterator.remove();
                        }
                    }
                }
            }
        }
    }

    private void sendInitRequests(IndexingServiceConnector IndexingServiceConnector) {
        ElasticIndexingServiceConnector connector = (ElasticIndexingServiceConnector) IndexingServiceConnector;

        // Send request to create index
        elasticIndexingClient.sendCreateIndexRequest(connector.getIndex(),
                elasticContentRequestBuilder.getCreateIndexRequestContent(connector));

        // Send request to create type
        elasticIndexingClient.sendCreateTypeRequest(connector.getIndex(), connector.getType(),
                connector.getMapping());
    }

    /**
     * If the bulk request already reached a size limitation, the bulk request
     * need to be sent immediately
     *
     * @param bulkRequest to analyze
     * @return
     */
    private String checkBulkRequestSizeReachedLimitation(String bulkRequest) {
        if (bulkRequest.getBytes().length >= requestSizeLimit) {
            elasticIndexingClient.sendCUDRequest(bulkRequest);
            // return an empty bulk request
            return "";
        } else {
            return bulkRequest;
        }
    }

    private void addInitOperation(String connector) {
        IndexingOperation indexingOperation = new IndexingOperation();
        indexingOperation.setEntityType(connector);
        indexingOperation.setOperation(OperationType.INIT);
        indexingOperationDAO.create(indexingOperation);
    }

    public Integer getBatchNumber() {
        return batchNumber;
    }

    public void setBatchNumber(Integer batchNumber) {
        this.batchNumber = batchNumber;
    }

    public Integer getRequestSizeLimit() {
        return requestSizeLimit;
    }

    public void setRequestSizeLimit(Integer requestSizeLimit) {
        this.requestSizeLimit = requestSizeLimit;
    }

    public int getReindexBatchSize() {
        return reindexBatchSize;
    }

    public void setReindexBatchSize(int reindexBatchSize) {
        this.reindexBatchSize = reindexBatchSize;
    }

    @Override
    public void start() {
        // ES index and type need to be created for all registered connectors
        initConnectors();
    }

    @Override
    public void stop() {

    }

    private void initConnectors() {
        for (Map.Entry<String, IndexingServiceConnector> entry : getConnectors().entrySet()) {
            addInitOperation(entry.getValue().getType());
        }
    }
}