uk.gov.nationalarchives.discovery.taxonomy.common.service.impl.QueryBasedCategoriserServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for uk.gov.nationalarchives.discovery.taxonomy.common.service.impl.QueryBasedCategoriserServiceImpl.java

Source

/** 
 * Copyright (c) 2015, The National Archives
 * http://www.nationalarchives.gov.uk 
 * 
 * This Source Code Form is subject to the terms of the Mozilla Public 
 * License, v. 2.0. If a copy of the MPL was not distributed with this 
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */
package uk.gov.nationalarchives.discovery.taxonomy.common.service.impl;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.stereotype.Service;
import uk.gov.nationalarchives.discovery.taxonomy.common.domain.TaxonomyMapper;
import uk.gov.nationalarchives.discovery.taxonomy.common.domain.annotation.Loggable;
import uk.gov.nationalarchives.discovery.taxonomy.common.domain.repository.lucene.InformationAssetView;
import uk.gov.nationalarchives.discovery.taxonomy.common.domain.repository.lucene.InformationAssetViewFields;
import uk.gov.nationalarchives.discovery.taxonomy.common.domain.repository.mongo.*;
import uk.gov.nationalarchives.discovery.taxonomy.common.domain.service.CategorisationResult;
import uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.IAViewRepository;
import uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.InMemoryIAViewRepository;
import uk.gov.nationalarchives.discovery.taxonomy.common.repository.mongo.CategoryRepository;
import uk.gov.nationalarchives.discovery.taxonomy.common.repository.mongo.IAViewUpdateRepository;
import uk.gov.nationalarchives.discovery.taxonomy.common.repository.mongo.InformationAssetViewMongoRepository;
import uk.gov.nationalarchives.discovery.taxonomy.common.repository.solr.SolrTaxonomyIAViewRepository;
import uk.gov.nationalarchives.discovery.taxonomy.common.service.CategoriserService;
import uk.gov.nationalarchives.discovery.taxonomy.common.service.async.AsyncQueryBasedServiceTaskManager;

import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;

@Service("categoriserService")
@ConditionalOnProperty(prefix = "lucene.categoriser.", value = "useQueryBasedCategoriser")
public class QueryBasedCategoriserServiceImpl implements CategoriserService<CategorisationResult> {

    private static final Logger logger = LoggerFactory.getLogger(QueryBasedCategoriserServiceImpl.class);

    private final CategoryRepository categoryRepository;

    private final IAViewRepository iaViewRepository;

    private final InMemoryIAViewRepository inMemoryiaViewRepository;

    private final InformationAssetViewMongoRepository informationAssetViewMongoRepository;

    private final IAViewUpdateRepository iaViewUpdateRepository;

    private final AsyncQueryBasedServiceTaskManager asyncTaskManager;

    private final SolrTaxonomyIAViewRepository solrTaxonomyIAViewRepository;

    @Autowired
    public QueryBasedCategoriserServiceImpl(CategoryRepository categoryRepository,
            IAViewRepository iaViewRepository, InMemoryIAViewRepository inMemoryiaViewRepository,
            InformationAssetViewMongoRepository informationAssetViewMongoRepository,
            IAViewUpdateRepository iaViewUpdateRepository,
            AsyncQueryBasedServiceTaskManager asyncQueryBasedServiceTaskManager,
            SolrTaxonomyIAViewRepository solrTaxonomyIAViewRepository) {
        super();
        this.categoryRepository = categoryRepository;
        this.iaViewRepository = iaViewRepository;
        this.inMemoryiaViewRepository = inMemoryiaViewRepository;
        this.informationAssetViewMongoRepository = informationAssetViewMongoRepository;
        this.iaViewUpdateRepository = iaViewUpdateRepository;
        this.asyncTaskManager = asyncQueryBasedServiceTaskManager;
        this.solrTaxonomyIAViewRepository = solrTaxonomyIAViewRepository;
    }

    @Override
    @Loggable
    public List<CategorisationResult> testCategoriseSingle(String docReference) {
        logger.info(".testCategoriseSingle: docreference:{} ", docReference);
        return testCategoriseSingle((iaViewRepository.searchDocByDocReference(docReference)), true, null);
    }

    /**
     * Low end API to test the categorisation of a document<br/>
     * the use of retrieveScoreForAllRelevantCategories parameter has a huge
     * impact on performances:<br/>
     * should be true when displaying the result of categorisation to a user or
     * used to run a process where performances are not critical<br/>
     * should be false when performances are critical: typically, in batch
     * processes.
     * 
     * @param iaView
     * @param retrieveScoreForAllRelevantCategories
     * @param cachedCategories
     *            on batch processes, to avoid retrieving and parsing all
     *            category queries, provide cached categories
     * @return
     */
    public List<CategorisationResult> testCategoriseSingle(InformationAssetView iaView,
            boolean retrieveScoreForAllRelevantCategories, List<CategoryWithLuceneQuery> cachedCategories) {
        List<CategorisationResult> listOfCategoryResults = new ArrayList<CategorisationResult>();

        List<Category> listOfRelevantCategories;
        if (cachedCategories == null) {
            listOfRelevantCategories = inMemoryiaViewRepository.findRelevantCategoriesForDocument(iaView,
                    categoryRepository.findAll());
        } else {
            listOfRelevantCategories = inMemoryiaViewRepository.findRelevantCategoriesForDocument(iaView,
                    cachedCategories);
        }

        if (retrieveScoreForAllRelevantCategories) {
            listOfCategoryResults = runCategorisationWithFSDirectory(iaView, listOfRelevantCategories);
            sortCategorisationResultsByScoreDesc(listOfCategoryResults);
        } else {
            logger.debug(
                    ".testCategoriseSingle: runCategorisationWithFSDirectory only on categories with threshold");
            List<CategorisationResult> generatedResults = getListOfGeneratedResultsForCategoriesWithoutThreshold(
                    listOfRelevantCategories);
            listOfCategoryResults.addAll(generatedResults);

            List<Category> listOfMatchingCategoriesWithThreshold = getListOfMatchingCategoriesWithThreshold(
                    listOfRelevantCategories);
            List<CategorisationResult> listOfResultsForCategoriesWithThreshold = runCategorisationWithFSDirectory(
                    iaView, listOfMatchingCategoriesWithThreshold);
            listOfCategoryResults.addAll(listOfResultsForCategoriesWithThreshold);
        }

        return listOfCategoryResults;
    }

    private List<Category> getListOfMatchingCategoriesWithThreshold(List<Category> listOfRelevantCategories) {
        List<Category> listOfMatchingCategoriesWithThreshold = new ArrayList<Category>();
        for (Category category : listOfRelevantCategories) {
            if (category.getSc() != 0d) {
                listOfMatchingCategoriesWithThreshold.add(category);
            }
        }
        return listOfMatchingCategoriesWithThreshold;
    }

    private List<CategorisationResult> getListOfGeneratedResultsForCategoriesWithoutThreshold(
            List<Category> listOfRelevantCategories) {
        List<CategorisationResult> listOfGeneratedResultsForCategoriesWithoutThreshold = new ArrayList<CategorisationResult>();
        for (Category category : listOfRelevantCategories) {
            if (category.getSc() == 0d) {
                CategorisationResult generatedResultForCategoryWithoutThreshold = new CategorisationResult(
                        category.getTtl(), category.getCiaid(), null);
                listOfGeneratedResultsForCategoriesWithoutThreshold.add(generatedResultForCategoryWithoutThreshold);
            }
        }
        return listOfGeneratedResultsForCategoriesWithoutThreshold;
    }

    @Override
    @Loggable
    public List<CategorisationResult> categoriseSingle(String docReference) {
        logger.info(".categoriseSingle: docreference:{} ", docReference);
        return categoriseSingle((iaViewRepository.searchDocByDocReference(docReference)));
    }

    @Override
    public List<CategorisationResult> categoriseSingle(String docReference,
            List<CategoryWithLuceneQuery> cachedCategories) {
        logger.info(".categoriseSingle: docreference:{} ", docReference);
        return categoriseSingle((iaViewRepository.searchDocByDocReference(docReference)), cachedCategories);
    }

    public List<CategorisationResult> categoriseSingle(InformationAssetView iaView) {
        return categoriseSingle(iaView, null);
    }

    public List<CategorisationResult> categoriseSingle(InformationAssetView iaView,
            List<CategoryWithLuceneQuery> cachedCategories) {
        List<CategorisationResult> listOfCategorisationResults = testCategoriseSingle(iaView, false,
                cachedCategories);

        List<CategoryLight> categories = getListOfCategoryLightFromListOfCatResult(listOfCategorisationResults);

        Date creationDate = Calendar.getInstance().getTime();

        saveRecordInMongoIAViewTable(iaView, categories, creationDate);

        saveNewRecordInIAViewUpdateTable(iaView, categories, creationDate);

        return listOfCategorisationResults;
    }

    private void saveNewRecordInIAViewUpdateTable(InformationAssetView iaView, List<CategoryLight> categories,
            Date creationDate) {
        IAViewUpdate iaViewUpdateFromLuceneIAView = TaxonomyMapper.getIAViewUpdateFromLuceneIAView(iaView,
                creationDate);
        iaViewUpdateFromLuceneIAView.setCategories(categories);
        iaViewUpdateRepository.save(iaViewUpdateFromLuceneIAView);
    }

    private void saveRecordInMongoIAViewTable(InformationAssetView iaView, List<CategoryLight> categories,
            Date creationDate) {
        MongoInformationAssetView mongoIAViewFromLuceneIAView = TaxonomyMapper
                .getMongoIAViewFromLuceneIAView(iaView, creationDate);
        mongoIAViewFromLuceneIAView.setCategories(categories);
        informationAssetViewMongoRepository.save(mongoIAViewFromLuceneIAView);
    }

    private List<CategoryLight> getListOfCategoryLightFromListOfCatResult(
            List<CategorisationResult> listOfCategorisationResults) {
        List<CategoryLight> categories = new ArrayList<CategoryLight>();
        for (CategorisationResult categorisationResult : listOfCategorisationResults) {
            categories.add(new CategoryLight(categorisationResult.getCiaid(), categorisationResult.getName()));
        }
        return categories;
    }

    private List<CategorisationResult> runCategorisationWithFSDirectory(InformationAssetView iaView,
            List<Category> listOfRelevantCategories) {
        List<CategorisationResult> listOfCategoryResults = new ArrayList<CategorisationResult>();
        List<Future<CategorisationResult>> listOfFutureCategoryResults = new ArrayList<Future<CategorisationResult>>();

        // TODO PERF cache filter on current document
        // Filter filter = new CachingWrapperFilter(new QueryWrapperFilter(new
        // TermQuery(new Term(
        // InformationAssetViewFields.DOCREFERENCE.toString(),
        // iaView.getDOCREFERENCE()))));
        Query filter = new TermQuery(
                new Term(InformationAssetViewFields.DOCREFERENCE.toString(), iaView.getDOCREFERENCE()));
        for (Category category : listOfRelevantCategories) {
            listOfFutureCategoryResults.add(asyncTaskManager.runUnitFSCategoryQuery(filter, category));
        }

        for (Future<CategorisationResult> futureCatResult : listOfFutureCategoryResults) {
            try {
                CategorisationResult categorisationResult = futureCatResult.get();
                if (categorisationResult != null) {
                    listOfCategoryResults.add(categorisationResult);
                }
            } catch (InterruptedException | ExecutionException e) {
                logger.error(
                        ".runCategorisationWithFSDirectory: an exception occured while retreiving the categorisation result: , exception: {}",
                        futureCatResult.toString(), e);
            }
        }

        return listOfCategoryResults;
    }

    private void sortCategorisationResultsByScoreDesc(List<CategorisationResult> categorisationResults) {
        // Sort results by Score in descending Order
        Collections.sort(categorisationResults, new Comparator<CategorisationResult>() {
            public int compare(CategorisationResult a, CategorisationResult b) {
                return b.getScore().compareTo(a.getScore());
            }
        });
    }

    @Override
    public List<IAViewUpdate> getNewCategorisedDocumentsAfterDocumentAndUpToNSecondsInPast(
            IAViewUpdate afterIAViewUpdate, int nbOfSecondsInPast, int limit) {
        Date nowMinusNseconds = getDateWithNSecondsInPast(nbOfSecondsInPast);

        List<IAViewUpdate> listOfIAViewUpdatesToProcess = iaViewUpdateRepository
                .findDocumentsCreatedAfterDocumentAndCreatedBeforeDate(afterIAViewUpdate, nowMinusNseconds, limit);

        return listOfIAViewUpdatesToProcess;
    }

    @Override
    public List<IAViewUpdate> getNewCategorisedDocumentsFromDateToNSecondsInPast(Date date, int nbOfSecondsInPast,
            int limit) {
        Date pastDate = getDateWithNSecondsInPast(nbOfSecondsInPast);

        List<IAViewUpdate> listOfIAViewUpdatesToProcess = iaViewUpdateRepository
                .findDocumentsCreatedFromDateAndCreatedBeforeDate(date, pastDate, limit);

        return listOfIAViewUpdatesToProcess;
    }

    private Date getDateWithNSecondsInPast(int nbOfSeconds) {
        Calendar instance = Calendar.getInstance();
        instance.add(Calendar.SECOND, -nbOfSeconds);
        Date nowMinusNseconds = instance.getTime();
        return nowMinusNseconds;
    }

    @Override
    public IAViewUpdate findLastIAViewUpdate() {
        return iaViewUpdateRepository.findLastIAViewUpdate();
    }

    @Override
    public void refreshTaxonomyIndex() {
        solrTaxonomyIAViewRepository.commit();
        iaViewRepository.refreshIndexUsedForCategorisation();
    }

}