uk.gov.nationalarchives.discovery.taxonomy.common.service.impl.EvaluationServiceTest.java Source code

Java tutorial

Introduction

Here is the source code for uk.gov.nationalarchives.discovery.taxonomy.common.service.impl.EvaluationServiceTest.java

Source

/** 
 * Copyright (c) 2015, The National Archives
 * http://www.nationalarchives.gov.uk 
 * 
 * This Source Code Form is subject to the terms of the Mozilla Public 
 * License, v. 2.0. If a copy of the MPL was not distributed with this 
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */
package uk.gov.nationalarchives.discovery.taxonomy.common.service.impl;

import static org.hamcrest.MatcherAssert.*;
import static org.hamcrest.Matchers.*;
import uk.gov.nationalarchives.discovery.taxonomy.common.config.ServiceConfigurationTest;
import uk.gov.nationalarchives.discovery.taxonomy.common.domain.TaxonomyMapper;
import uk.gov.nationalarchives.discovery.taxonomy.common.domain.repository.lucene.InformationAssetView;
import uk.gov.nationalarchives.discovery.taxonomy.common.domain.repository.mongo.Category;
import uk.gov.nationalarchives.discovery.taxonomy.common.domain.repository.mongo.CategoryEvaluationResult;
import uk.gov.nationalarchives.discovery.taxonomy.common.domain.repository.mongo.EvaluationReport;
import uk.gov.nationalarchives.discovery.taxonomy.common.domain.repository.mongo.TestDocument;
import uk.gov.nationalarchives.discovery.taxonomy.common.domain.service.PaginatedList;
import uk.gov.nationalarchives.discovery.taxonomy.common.domain.service.TSetBasedCategorisationResult;
import uk.gov.nationalarchives.discovery.taxonomy.common.repository.legacy.LegacySystemRepository;
import uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.IAViewRepository;
import uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.LuceneTestTrainingDataSet;
import uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.tools.LuceneTaxonomyMapper;
import uk.gov.nationalarchives.discovery.taxonomy.common.repository.mongo.CategoryRepository;
import uk.gov.nationalarchives.discovery.taxonomy.common.repository.mongo.EvaluationReportRepository;
import uk.gov.nationalarchives.discovery.taxonomy.common.repository.mongo.MongoTestDataSet;
import uk.gov.nationalarchives.discovery.taxonomy.common.repository.mongo.TestDocumentRepository;
import uk.gov.nationalarchives.discovery.taxonomy.common.service.CategoriserService;

import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;

import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mockito;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.SpringApplicationConfiguration;
import org.springframework.test.context.ActiveProfiles;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;

@SuppressWarnings("rawtypes")
@ActiveProfiles("tsetBased")
@RunWith(SpringJUnit4ClassRunner.class)
@SpringApplicationConfiguration(classes = ServiceConfigurationTest.class)
public class EvaluationServiceTest {
    // private static final Logger logger =
    // LoggerFactory.getLogger(EvaluationServiceTest.class);

    public EvaluationServiceImpl evaluationService;

    @Autowired
    TestDocumentRepository testDocumentRepository;

    @Autowired
    private EvaluationReportRepository evaluationReportRepository;

    @Autowired
    MongoTestDataSet mongoTestDataSet;

    @Autowired
    private LuceneTestTrainingDataSet luceneTestDataSet;

    @Autowired
    private CategoryRepository categoryRepository;

    private String[] legacyCategories = new String[] { "Construction industries", "Labour" };

    private static final String CAT_BAD_ACCURACY = "Category with Bad Accuracy";
    private static final String CAT_MEDIUM_RECALL = "Category with Bad Recall";
    private static final String CAT_GOOD = "Category with Good Accuracy and Recall";
    private static final String CAT_FROM_LEGACY_SYSTEM_NOT_KNOWN = "Category Not known in new System";
    private static final String CAT_FROM_CURRENT_SYSTEM_NOT_FOUND = "Category from current system not Found anywhere";

    @Before
    public void initMocks() throws IOException {
        evaluationService = new EvaluationServiceImpl(categoryRepository, getIaViewRepositoryMock(),
                testDocumentRepository, evaluationReportRepository, getLegacySystemServiceMock(), null);
    }

    @After
    public void dropDatabse() {
        mongoTestDataSet.dropDatabase();
    }

    /**
     * TEST CREATE TEST DATASET
     */

    @Test
    public void testCreateTestDataSet() {
        mongoTestDataSet.initCategoryCollectionWith1element();
        assertThatTestDocDbIsEmpty();

        evaluationService.createEvaluationTestDataset(10);

        assertThatTestDocDbContainsDocsWithLegacyCategories();
    }

    private void assertThatTestDocDbContainsDocsWithLegacyCategories() {
        assertThat(testDocumentRepository.count(), is(equalTo(1l)));
        Iterable<TestDocument> trainingDocuments = testDocumentRepository.findAll();
        assertThat(trainingDocuments, is(notNullValue()));
        TestDocument doc = trainingDocuments.iterator().next();
        assertThat(doc, is(notNullValue()));
        assertThat(doc.getLegacyCategories(), is(not(emptyArray())));
    }

    private void assertThatTestDocDbIsEmpty() {
        assertThat(testDocumentRepository.count(), is(equalTo(0l)));
    }

    /**
     * TEST RUN CATEGORISATION ON TEST DATASET
     */

    @Test
    public void testRunCategorisationOnTestDataSet() {
        initTestDocumentRepositoryWithOneElementWithLegacyCategories();
        evaluationService = new EvaluationServiceImpl(categoryRepository, getIaViewRepositoryMock(),
                testDocumentRepository, evaluationReportRepository, getLegacySystemServiceMock(),
                getCategoriserServiceMock());

        evaluationService.runCategorisationOnTestDataSet(true);

        assertThatTestDocumentContainsCurrentSystemCategories();
    }

    private void assertThatTestDocumentContainsCurrentSystemCategories() {
        assertThat(testDocumentRepository.count(), is(equalTo(1l)));
        TestDocument doc = testDocumentRepository.findAll().iterator().next();
        assertThat(doc, notNullValue());
        assertThat(doc.getCategories(), notNullValue());
        assertThat(doc.getCategories(), is(not(emptyArray())));

    }

    private void initTestDocumentRepositoryWithOneElementWithLegacyCategories() {
        TestDocument testDocument = TaxonomyMapper.getTestDocumentFromIAView(MongoTestDataSet.getIAViewSample());
        testDocument.setLegacyCategories(legacyCategories);
        testDocumentRepository.save(testDocument);

    }

    /**
     * TEST EVALUATION REPORT
     */

    @Test
    public void testGetEvaluationReport() {
        initCategoryCollection();
        initTestDocumentRepositoryWithSeveralCompleteDocs();

        EvaluationReport report = evaluationService.getEvaluationReport("test report");

        assertThat(report, notNullValue());
        assertThat(report.getResults(), is(notNullValue()));
        assertThat(report.getResults(), is(not(empty())));
        assertThat("there should be 4 category evaluation results", report.getResults().size(), is(equalTo(5)));
        assertThat("there should be 3 documents processed", report.getNumberOfDocuments(), is(equalTo(3)));

        DecimalFormat df = new DecimalFormat("#.####");

        assertThat(report.getAvgRecall(), is(notNullValue()));
        assertThat("Global recall should be 8/15", df.format(report.getAvgRecall()),
                is(equalTo(df.format(1.0d * 8 / 15))));
        assertThat(report.getAvgAccuracy(), is(notNullValue()));
        assertThat("Global accuracy should be 7/15", df.format(report.getAvgAccuracy()),
                is(equalTo(df.format(1.0d * 7 / 15))));
        assertThat(evaluationReportRepository.count(), is(equalTo(1l)));

        int numNotFoundInCatRepo = 0;
        int numNotFoundInTDocCat = 0;
        int numNotFoundInTDocLegacyCat = 0;
        for (CategoryEvaluationResult categoryEvaluationResult : report.getResults()) {
            if (!categoryEvaluationResult.isFoundInCatRepo()) {
                numNotFoundInCatRepo++;
            }
            if (!categoryEvaluationResult.isFoundInTDocCat()) {
                numNotFoundInTDocCat++;
            }
            if (!categoryEvaluationResult.isFoundInTDocLegacyCat()) {
                numNotFoundInTDocLegacyCat++;
            }
        }
        assertThat(numNotFoundInCatRepo, is(equalTo(1)));
        assertThat(numNotFoundInTDocCat, is(equalTo(2)));
        assertThat(numNotFoundInTDocLegacyCat, is(equalTo(1)));
    }

    private void initCategoryCollection() {
        saveCategory(CAT_BAD_ACCURACY);
        saveCategory(CAT_MEDIUM_RECALL);
        saveCategory(CAT_GOOD);
        saveCategory(CAT_FROM_CURRENT_SYSTEM_NOT_FOUND);

    }

    private void saveCategory(String categoryTtl) {
        Category category = new Category();
        category.setCiaid(categoryTtl);
        category.setTtl(categoryTtl);
        category.setLck(false);
        category.setSc(0.0);
        categoryRepository.save(category);
    }

    private void initTestDocumentRepositoryWithSeveralCompleteDocs() {
        TestDocument testDocument1 = TaxonomyMapper.getTestDocumentFromIAView(MongoTestDataSet.getIAViewSample());
        testDocument1.setDocReference("DOC1");
        testDocument1.setCategories(new String[] { CAT_BAD_ACCURACY, CAT_MEDIUM_RECALL });
        testDocument1.setLegacyCategories(new String[] { CAT_BAD_ACCURACY, CAT_MEDIUM_RECALL });
        testDocumentRepository.save(testDocument1);

        TestDocument testDocument2 = TaxonomyMapper.getTestDocumentFromIAView(MongoTestDataSet.getIAViewSample());
        testDocument2.setDocReference("DOC2");
        testDocument2.setCategories(new String[] { CAT_BAD_ACCURACY, CAT_MEDIUM_RECALL });
        testDocument2.setLegacyCategories(new String[] { CAT_MEDIUM_RECALL });
        testDocumentRepository.save(testDocument2);

        TestDocument testDocument3 = TaxonomyMapper.getTestDocumentFromIAView(MongoTestDataSet.getIAViewSample());
        testDocument3.setDocReference("DOC3");
        testDocument3.setCategories(new String[] { CAT_BAD_ACCURACY, CAT_GOOD });
        testDocument3.setLegacyCategories(
                new String[] { CAT_MEDIUM_RECALL, CAT_GOOD, CAT_FROM_LEGACY_SYSTEM_NOT_KNOWN });
        testDocumentRepository.save(testDocument3);

    }

    /**
     * 
     * MOCKS
     * 
     */

    private CategoriserService getCategoriserServiceMock() {
        CategoriserService categoriserService = Mockito.mock(CategoriserService.class);
        List<TSetBasedCategorisationResult> categorisationResults = new ArrayList<TSetBasedCategorisationResult>();
        categorisationResults.addAll(Arrays.asList(new TSetBasedCategorisationResult("Labour", 1.12f, 10),
                new TSetBasedCategorisationResult("Forestry", 1.02f, 15),
                new TSetBasedCategorisationResult("Forestry", 0.12f, 2)));
        Mockito.when(categoriserService.testCategoriseSingle(Mockito.any(String.class)))
                .thenReturn(categorisationResults);

        return categoriserService;
    }

    private IAViewRepository getIaViewRepositoryMock() {
        IAViewRepository iaViewRepositoryMock = Mockito.mock(IAViewRepository.class);
        PaginatedList<InformationAssetView> searchResult = new PaginatedList<InformationAssetView>();
        searchResult.setNumberOfResults(1);
        searchResult.setResults(Arrays.asList(MongoTestDataSet.getIAViewSample()));
        Mockito.when(iaViewRepositoryMock.performSearch(Mockito.anyString(), Mockito.anyDouble(), Mockito.anyInt(),
                Mockito.anyInt())).thenReturn(searchResult);

        Mockito.when(iaViewRepositoryMock.searchIAViewIndexByFieldAndPhrase(Mockito.anyString(),
                Mockito.anyString(), Mockito.anyInt()))
                .thenReturn(new TopDocs(1, new ScoreDoc[] { new ScoreDoc(0, 1f) }, 1f));

        Mockito.when(iaViewRepositoryMock.getDoc(Mockito.any(ScoreDoc.class)))
                .thenReturn(LuceneTaxonomyMapper.getLuceneDocumentFromIAView(MongoTestDataSet.getIAViewSample()));
        return iaViewRepositoryMock;
    }

    private LegacySystemRepository getLegacySystemServiceMock() {
        LegacySystemRepository legacySystemServiceMock = Mockito.mock(LegacySystemRepository.class);
        HashMap<String, String[]> mapOfLegacyDocuments = new HashMap<String, String[]>();
        mapOfLegacyDocuments.put("C465432", new String[] { "Air Force", "Medals" });
        Mockito.when(legacySystemServiceMock.findLegacyDocumentsByCategory(Mockito.anyString(), Mockito.anyInt()))
                .thenReturn(mapOfLegacyDocuments);
        return legacySystemServiceMock;
    }

}