org.lilyproject.indexer.engine.IndexUpdater.java Source code

Java tutorial

Introduction

Here is the source code for org.lilyproject.indexer.engine.IndexUpdater.java

Source

/*
 * Copyright 2010 Outerthought bvba
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.lilyproject.indexer.engine;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.lilyproject.indexer.model.indexerconf.DerefValue;
import org.lilyproject.indexer.model.indexerconf.IndexCase;
import org.lilyproject.indexer.model.indexerconf.IndexField;
import org.lilyproject.linkindex.LinkIndex;
import org.lilyproject.repository.api.*;
import org.lilyproject.rowlog.api.RowLogException;
import org.lilyproject.util.repo.RecordEvent;
import org.lilyproject.util.repo.VersionTag;
import org.lilyproject.rowlog.api.RowLogMessage;
import org.lilyproject.rowlog.api.RowLogMessageListener;
import org.lilyproject.util.ObjectUtils;

import static org.lilyproject.util.repo.RecordEvent.Type.*;

import java.io.IOException;
import java.util.*;

/**
 * Updates the index in response to repository events.
 */
public class IndexUpdater implements RowLogMessageListener {
    private Repository repository;
    private TypeManager typeManager;
    private LinkIndex linkIndex;
    private Indexer indexer;
    private IndexUpdaterMetrics metrics;
    private ClassLoader myContextClassLoader;
    private IndexLocker indexLocker;

    private Log log = LogFactory.getLog(getClass());

    public IndexUpdater(Indexer indexer, Repository repository, LinkIndex linkIndex, IndexLocker indexLocker,
            IndexUpdaterMetrics metrics) throws RowLogException {
        this.indexer = indexer;
        this.repository = repository;
        this.typeManager = repository.getTypeManager();
        this.linkIndex = linkIndex;

        this.indexLocker = indexLocker;

        this.myContextClassLoader = Thread.currentThread().getContextClassLoader();

        this.metrics = metrics;
    }

    public boolean processMessage(RowLogMessage msg) {
        long before = System.currentTimeMillis();

        // During the processing of this message, we switch the context class loader to the one
        // of the Kauri module to which the index updater belongs. This is necessary for Tika
        // to find its parser implementations.

        ClassLoader currentCL = Thread.currentThread().getContextClassLoader();
        try {
            Thread.currentThread().setContextClassLoader(myContextClassLoader);
            RecordEvent event = new RecordEvent(msg.getPayload());
            RecordId recordId = repository.getIdGenerator().fromBytes(msg.getRowKey());

            if (log.isDebugEnabled()) {
                log.debug("Received message: " + event.toJson());
            }

            if (event.getType().equals(DELETE)) {
                // For deleted records, we cannot determine the record type, so we do not know if there was
                // an applicable index case, so we always perform a delete.
                indexLocker.lock(recordId);
                try {
                    indexer.delete(recordId);
                } finally {
                    indexLocker.unlockLogFailure(recordId);
                }

                if (log.isDebugEnabled()) {
                    log.debug(String.format(
                            "Record %1$s: deleted from index (if present) because of " + "delete record event",
                            recordId));
                }

                // After this we can go to update denormalized data
                updateDenormalizedData(recordId, event, null, null);
            } else {
                Map<Long, Set<String>> vtagsByVersion;
                Map<Scope, Set<FieldType>> updatedFieldsByScope;

                indexLocker.lock(recordId);
                try {
                    IdRecord record;
                    try {
                        record = repository.readWithIds(recordId, null, null);
                    } catch (RecordNotFoundException e) {
                        // The record has been deleted in the meantime.
                        // For now, we do nothing, when the delete event is received the record will be removed
                        // from the index.
                        // TODO: we should process all outstanding messages for the record (up to delete) in one go
                        return true;
                    }

                    // Read the vtags of the record. Note that while this algorithm is running, the record can meanwhile
                    // undergo changes. However, we continuously work with the snapshot of the vtags mappings read here.
                    // The processing of later events will bring the index up to date with any new changes.
                    Map<String, Long> vtags = VersionTag.getTagsById(record, typeManager);
                    vtagsByVersion = VersionTag.tagsByVersion(vtags);

                    updatedFieldsByScope = getFieldTypeAndScope(event.getUpdatedFields());

                    handleRecordCreateUpdate(record, event, vtags, vtagsByVersion, updatedFieldsByScope);
                } finally {
                    indexLocker.unlockLogFailure(recordId);
                }

                updateDenormalizedData(recordId, event, updatedFieldsByScope, vtagsByVersion);
            }

        } catch (Exception e) {
            // TODO
            //  TODO if the error is an IndexLockException: indexing can be retried later thus rather return false
            log.error("Error processing event in indexer.", e);
        } finally {
            long after = System.currentTimeMillis();
            metrics.updates.inc(after - before);
            Thread.currentThread().setContextClassLoader(currentCL);
        }
        return true;
    }

    private void handleRecordCreateUpdate(IdRecord record, RecordEvent event, Map<String, Long> vtags,
            Map<Long, Set<String>> vtagsByVersion, Map<Scope, Set<FieldType>> updatedFieldsByScope)
            throws Exception {

        // Determine the IndexCase:
        //  The indexing of all versions is determined by the record type of the non-versioned scope.
        //  This makes that the indexing behavior of all versions is equal, and can be changed (the
        //  record type of the versioned scope is immutable).
        IndexCase indexCase = indexer.getConf().getIndexCase(record.getRecordTypeName(),
                record.getId().getVariantProperties());

        if (indexCase == null) {
            // The record should not be indexed
            // But data from this record might be denormalized into other index entries
            // After this we go to update denormalized data
        } else {
            Set<String> vtagsToIndex = new HashSet<String>();

            if (event.getType().equals(CREATE)) {
                // New record: just index everything
                indexer.setIndexAllVTags(vtagsToIndex, vtags, indexCase, record);
                // After this we go to the indexing

            } else if (event.getRecordTypeChanged()) {
                // When the record type changes, the rules to index (= the IndexCase) change

                // Delete everything: we do not know the previous record type, so we do not know what
                // version tags were indexed, so we simply delete everything
                indexer.delete(record.getId());

                if (log.isDebugEnabled()) {
                    log.debug(String.format("Record %1$s: deleted existing entries from index (if present) "
                            + "because of record type change", record.getId()));
                }

                // Reindex all needed vtags
                indexer.setIndexAllVTags(vtagsToIndex, vtags, indexCase, record);

                // After this we go to the indexing
            } else { // a normal update

                if (event.getVersionCreated() == 1 && event.getType().equals(UPDATE)
                        && indexCase.getIndexVersionless()) {
                    // If the first version was created, but the record was not new, then there
                    // might already be an @@versionless index entry
                    indexer.delete(record.getId(), VersionTag.VERSIONLESS_TAG);

                    if (log.isDebugEnabled()) {
                        log.debug(String.format("Record %1$s: deleted versionless entry from index (if present) "
                                + "because of creation first version", record.getId()));
                    }
                }

                //
                // Handle changes to non-versioned fields
                //
                if (updatedFieldsByScope.get(Scope.NON_VERSIONED).size() > 0) {
                    if (atLeastOneUsedInIndex(updatedFieldsByScope.get(Scope.NON_VERSIONED))) {
                        indexer.setIndexAllVTags(vtagsToIndex, vtags, indexCase, record);
                        // After this we go to the treatment of changed vtag fields
                        if (log.isDebugEnabled()) {
                            log.debug(String.format(
                                    "Record %1$s: non-versioned fields changed, will reindex all vtags.",
                                    record.getId()));
                        }
                    }
                }

                //
                // Handle changes to versioned(-mutable) fields
                //
                // If there were non-versioned fields changed, then we already reindex all versions
                // so this can be skipped.
                //
                // In the case of newly created versions that should be indexed: this will often be
                // accompanied by corresponding changes to vtag fields, which are handled next, and in which case
                // it would work as well if this code would not be here.
                //
                if (vtagsToIndex.isEmpty()
                        && (event.getVersionCreated() != -1 || event.getVersionUpdated() != -1)) {
                    if (atLeastOneUsedInIndex(updatedFieldsByScope.get(Scope.VERSIONED))
                            || atLeastOneUsedInIndex(updatedFieldsByScope.get(Scope.VERSIONED_MUTABLE))) {

                        long version = event.getVersionCreated() != -1 ? event.getVersionCreated()
                                : event.getVersionUpdated();
                        if (vtagsByVersion.containsKey(version)) {
                            Set<String> tmp = new HashSet<String>();
                            tmp.addAll(indexCase.getVersionTags());
                            tmp.retainAll(vtagsByVersion.get(version));
                            vtagsToIndex.addAll(tmp);

                            if (log.isDebugEnabled()) {
                                log.debug(String.format("Record %1$s: versioned(-mutable) fields changed, will "
                                        + "index for all tags of modified version %2$s that require indexing: %3$s",
                                        record.getId(), version, indexer.vtagSetToNameString(vtagsToIndex)));
                            }
                        }
                    }
                }

                //
                // Handle changes to vtag fields themselves
                //
                Set<String> changedVTagFields = VersionTag.filterVTagFields(event.getUpdatedFields(), typeManager);
                // Remove the vtags which are going to be reindexed anyway
                changedVTagFields.removeAll(vtagsToIndex);
                for (String vtag : changedVTagFields) {
                    if (vtags.containsKey(vtag) && indexCase.getVersionTags().contains(vtag)) {
                        if (log.isDebugEnabled()) {
                            log.debug(String.format("Record %1$s: will index for created or updated vtag %2$s",
                                    record.getId(), indexer.safeLoadTagName(vtag)));
                        }
                        vtagsToIndex.add(vtag);
                    } else {
                        // The vtag does not exist anymore on the document, or does not need to be indexed: delete from index
                        indexer.delete(record.getId(), vtag);
                        if (log.isDebugEnabled()) {
                            log.debug(String.format("Record %1$s: deleted from index for deleted vtag %2$s",
                                    record.getId(), indexer.safeLoadTagName(vtag)));
                        }
                    }
                }
            }

            //
            // Index
            //
            indexer.index(record, vtagsToIndex, vtags);
        }
    }

    private void updateDenormalizedData(RecordId recordId, RecordEvent event,
            Map<Scope, Set<FieldType>> updatedFieldsByScope, Map<Long, Set<String>> vtagsByVersion) {

        // This algorithm is designed to first collect all the reindex-work, and then to perform it.
        // Otherwise the same document would be indexed multiple times if it would become invalid
        // because of different reasons (= different indexFields).

        //
        // Collect all the relevant IndexFields, and for each the relevant vtags
        //

        // This map will contain all the IndexFields we need to treat, and for each one the vtags to be considered
        Map<IndexField, Set<String>> indexFieldsVTags = new IdentityHashMap<IndexField, Set<String>>() {
            @Override
            public Set<String> get(Object key) {
                if (!this.containsKey(key) && key instanceof IndexField) {
                    this.put((IndexField) key, new HashSet<String>());
                }
                return super.get(key);
            }
        };

        // There are two cases when denormalized data needs updating:
        //   1. when the content of a (vtagged) record changes
        //   2. when vtags change (are added, removed or point to a different version)
        // We now handle these 2 cases.

        // === Case 1 === updates in response to changes to this record

        long version = event.getVersionCreated() == -1 ? event.getVersionUpdated() : event.getVersionCreated();

        // Determine the relevant index fields
        List<IndexField> indexFields;
        if (event.getType() == RecordEvent.Type.DELETE) {
            indexFields = indexer.getConf().getDerefIndexFields();
        } else {
            indexFields = new ArrayList<IndexField>();

            collectDerefIndexFields(updatedFieldsByScope.get(Scope.NON_VERSIONED), indexFields);

            if (version != -1 && vtagsByVersion.get(version) != null) {
                collectDerefIndexFields(updatedFieldsByScope.get(Scope.VERSIONED), indexFields);
                collectDerefIndexFields(updatedFieldsByScope.get(Scope.VERSIONED_MUTABLE), indexFields);
            }
        }

        // For each indexField, determine the vtags of the referrer that we should consider.
        // In the context of this algorithm, a referrer is each record whose index might contain
        // denormalized data from the record of which we are now processing the change event.
        nextIndexField: for (IndexField indexField : indexFields) {
            DerefValue derefValue = (DerefValue) indexField.getValue();
            FieldType fieldType = derefValue.getTargetField();

            //
            // Determine the vtags of the referrer that we should consider
            //
            Set<String> referrerVtags = indexFieldsVTags.get(indexField);

            // we do not know if the referrer has any versions at all, so always add the versionless tag
            referrerVtags.add(VersionTag.VERSIONLESS_TAG);

            if (fieldType.getScope() == Scope.NON_VERSIONED || event.getType() == RecordEvent.Type.DELETE) {
                // If it is a non-versioned field, then all vtags should be considered.
                // If it is a delete event, we do not know what vtags existed for the record, so consider them all.
                referrerVtags.addAll(indexer.getConf().getVtags());
            } else {
                // Otherwise only the vtags of the created/updated version, if any
                if (version != -1) {
                    Set<String> vtags = vtagsByVersion.get(version);
                    if (vtags != null)
                        referrerVtags.addAll(vtags);
                }
            }
        }

        // === Case 2 === handle updated/added/removed vtags

        Set<String> changedVTagFields = VersionTag.filterVTagFields(event.getUpdatedFields(), typeManager);
        if (!changedVTagFields.isEmpty()) {
            // In this case, the IndexFields which we need to handle are those that use fields from:
            //  - the previous version to which the vtag pointed (if it is not a new vtag)
            //  - the new version to which the vtag points (if it is not a deleted vtag)
            // But rather than calculating all that (consider the need to retrieve the versions),
            // for now we simply consider all IndexFields.
            // TODO could optimize this to exclude deref fields that use only non-versioned fields?
            for (IndexField indexField : indexer.getConf().getDerefIndexFields()) {
                indexFieldsVTags.get(indexField).addAll(changedVTagFields);
            }
        }

        //
        // Now search the referrers, that is: for each link field, find out which records point to the current record
        // in a certain versioned view (= a certain vtag)
        //

        // This map holds the referrer records to reindex, and for which versions (vtags) they need to be reindexed.
        Map<RecordId, Set<String>> referrersVTags = new HashMap<RecordId, Set<String>>() {
            @Override
            public Set<String> get(Object key) {
                if (!containsKey(key) && key instanceof RecordId) {
                    put((RecordId) key, new HashSet<String>());
                }
                return super.get(key);
            }
        };

        int searchedFollowCount = 0;

        // Run over the IndexFields
        nextIndexField: for (Map.Entry<IndexField, Set<String>> entry : indexFieldsVTags.entrySet()) {
            IndexField indexField = entry.getKey();
            Set<String> referrerVTags = entry.getValue();
            DerefValue derefValue = (DerefValue) indexField.getValue();

            // Run over the version tags
            for (String referrerVtag : referrerVTags) {
                List<DerefValue.Follow> follows = derefValue.getFollows();

                Set<RecordId> referrers = new HashSet<RecordId>();
                referrers.add(recordId);

                for (int i = follows.size() - 1; i >= 0; i--) {
                    searchedFollowCount++;
                    DerefValue.Follow follow = follows.get(i);

                    Set<RecordId> newReferrers = new HashSet<RecordId>();

                    if (follow instanceof DerefValue.FieldFollow) {
                        String fieldId = ((DerefValue.FieldFollow) follow).getFieldId();
                        for (RecordId referrer : referrers) {
                            try {
                                Set<RecordId> linkReferrers = linkIndex.getReferrers(referrer, referrerVtag,
                                        fieldId);
                                newReferrers.addAll(linkReferrers);
                            } catch (IOException e) {
                                // TODO
                                e.printStackTrace();
                            }
                        }
                    } else if (follow instanceof DerefValue.VariantFollow) {
                        DerefValue.VariantFollow varFollow = (DerefValue.VariantFollow) follow;
                        Set<String> dimensions = varFollow.getDimensions();

                        // We need to find out the variants of the current set of referrers which have the
                        // same variant properties as the referrer (= same key/value pairs) and additionally
                        // have the extra dimensions defined in the VariantFollow.

                        nextReferrer: for (RecordId referrer : referrers) {

                            Map<String, String> refprops = referrer.getVariantProperties();

                            // If the referrer already has one of the dimensions, then skip it
                            for (String dimension : dimensions) {
                                if (refprops.containsKey(dimension))
                                    continue nextReferrer;
                            }

                            //
                            Set<RecordId> variants;
                            try {
                                variants = repository.getVariants(referrer);
                            } catch (Exception e) {
                                // TODO we should probably throw this higher up and let it be handled there
                                throw new RuntimeException(e);
                            }

                            nextVariant: for (RecordId variant : variants) {
                                Map<String, String> varprops = variant.getVariantProperties();

                                // Check it has each of the variant properties of the current referrer record
                                for (Map.Entry<String, String> refprop : refprops.entrySet()) {
                                    if (!ObjectUtils.safeEquals(varprops.get(refprop.getKey()),
                                            refprop.getValue())) {
                                        // skip this variant
                                        continue nextVariant;
                                    }
                                }

                                // Check it has the additional dimensions
                                for (String dimension : dimensions) {
                                    if (!varprops.containsKey(dimension))
                                        continue nextVariant;
                                }

                                // We have a hit
                                newReferrers.add(variant);
                            }
                        }
                    } else if (follow instanceof DerefValue.MasterFollow) {
                        for (RecordId referrer : referrers) {
                            // A MasterFollow can only point to masters
                            if (referrer.isMaster()) {
                                Set<RecordId> variants;
                                try {
                                    variants = repository.getVariants(referrer);
                                } catch (RepositoryException e) {
                                    // TODO we should probably throw this higher up and let it be handled there
                                    throw new RuntimeException(e);
                                } catch (InterruptedException e) {
                                    // TODO we should probably throw this higher up and let it be handled there
                                    Thread.currentThread().interrupt();
                                    throw new RuntimeException(e);
                                }

                                variants.remove(referrer);
                                newReferrers.addAll(variants);
                            }
                        }
                    } else {
                        throw new RuntimeException(
                                "Unexpected implementation of DerefValue.Follow: " + follow.getClass().getName());
                    }

                    referrers = newReferrers;
                }

                for (RecordId referrer : referrers) {
                    referrersVTags.get(referrer).add(referrerVtag);
                }
            }
        }

        if (log.isDebugEnabled()) {
            log.debug(String.format(
                    "Record %1$s: found %2$s records (times vtags) to be updated because they "
                            + "might contain outdated denormalized data. Checked %3$s follow instances.",
                    recordId, referrersVTags.size(), searchedFollowCount));
        }

        //
        // Now re-index all the found referrers
        //
        nextReferrer: for (Map.Entry<RecordId, Set<String>> entry : referrersVTags.entrySet()) {
            RecordId referrer = entry.getKey();
            Set<String> vtagsToIndex = entry.getValue();

            boolean lockObtained = false;
            try {
                indexLocker.lock(referrer);
                lockObtained = true;

                IdRecord record = null;
                try {
                    // TODO optimize this: we are only interested to know the vtags and to know if the record has versions
                    record = repository.readWithIds(referrer, null, null);
                } catch (Exception e) {
                    // TODO handle this
                    // One case to be expected here is that the record has been deleted since we read the list of referrers
                    e.printStackTrace();
                }

                IndexCase indexCase = indexer.getConf().getIndexCase(record.getRecordTypeName(),
                        record.getId().getVariantProperties());
                if (indexCase == null) {
                    continue nextReferrer;
                }

                try {
                    if (record.getVersion() == null) {
                        if (indexCase.getIndexVersionless() && vtagsToIndex.contains(VersionTag.VERSIONLESS_TAG)) {
                            indexer.index(record, Collections.singleton(VersionTag.VERSIONLESS_TAG));
                        }
                    } else {
                        Map<String, Long> recordVTags = VersionTag.getTagsById(record, typeManager);
                        vtagsToIndex.retainAll(indexCase.getVersionTags());
                        // Only keep vtags which exist on the record
                        vtagsToIndex.retainAll(recordVTags.keySet());
                        indexer.indexRecord(record.getId(), vtagsToIndex, recordVTags);
                    }
                } catch (Exception e) {
                    // TODO handle this
                    e.printStackTrace();
                }
            } catch (IndexLockException e) {
                // TODO handle this
                e.printStackTrace();
            } finally {
                if (lockObtained) {
                    indexLocker.unlockLogFailure(referrer);
                }
            }
        }

    }

    private void collectDerefIndexFields(Set<FieldType> fieldTypes, List<IndexField> indexFields) {
        for (FieldType fieldType : fieldTypes) {
            indexFields.addAll(indexer.getConf().getDerefIndexFields(fieldType.getId()));
        }
    }

    private boolean atLeastOneUsedInIndex(Set<FieldType> fieldTypes) {
        for (FieldType type : fieldTypes) {
            if (indexer.getConf().isIndexFieldDependency(type.getId())) {
                return true;
            }
        }
        return false;
    }

    private Map<Scope, Set<FieldType>> getFieldTypeAndScope(Set<String> fieldIds) {
        Map<Scope, Set<FieldType>> result = new HashMap<Scope, Set<FieldType>>();
        for (Scope scope : Scope.values()) {
            result.put(scope, new HashSet<FieldType>());
        }

        for (String fieldId : fieldIds) {
            FieldType fieldType;
            try {
                fieldType = typeManager.getFieldTypeById(fieldId);
            } catch (FieldTypeNotFoundException e) {
                continue;
            } catch (Exception e) {
                // TODO not sure what to do in these kinds of situations
                throw new RuntimeException(e);
            }
            result.get(fieldType.getScope()).add(fieldType);
        }

        return result;
    }
}