Java tutorial
/** * Copyright (c) 2015, The National Archives * http://www.nationalarchives.gov.uk * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ package uk.gov.nationalarchives.discovery.taxonomy.common.service.impl; import static org.hamcrest.MatcherAssert.*; import static org.hamcrest.Matchers.*; import uk.gov.nationalarchives.discovery.taxonomy.common.config.ServiceConfigurationTest; import uk.gov.nationalarchives.discovery.taxonomy.common.domain.TaxonomyMapper; import uk.gov.nationalarchives.discovery.taxonomy.common.domain.repository.lucene.InformationAssetView; import uk.gov.nationalarchives.discovery.taxonomy.common.domain.repository.mongo.Category; import uk.gov.nationalarchives.discovery.taxonomy.common.domain.repository.mongo.CategoryEvaluationResult; import uk.gov.nationalarchives.discovery.taxonomy.common.domain.repository.mongo.EvaluationReport; import uk.gov.nationalarchives.discovery.taxonomy.common.domain.repository.mongo.TestDocument; import uk.gov.nationalarchives.discovery.taxonomy.common.domain.service.PaginatedList; import uk.gov.nationalarchives.discovery.taxonomy.common.domain.service.TSetBasedCategorisationResult; import uk.gov.nationalarchives.discovery.taxonomy.common.repository.legacy.LegacySystemRepository; import uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.IAViewRepository; import uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.LuceneTestTrainingDataSet; import uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.tools.LuceneTaxonomyMapper; import uk.gov.nationalarchives.discovery.taxonomy.common.repository.mongo.CategoryRepository; import uk.gov.nationalarchives.discovery.taxonomy.common.repository.mongo.EvaluationReportRepository; import uk.gov.nationalarchives.discovery.taxonomy.common.repository.mongo.MongoTestDataSet; import uk.gov.nationalarchives.discovery.taxonomy.common.repository.mongo.TestDocumentRepository; import uk.gov.nationalarchives.discovery.taxonomy.common.service.CategoriserService; import java.io.IOException; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.Mockito; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.SpringApplicationConfiguration; import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; @SuppressWarnings("rawtypes") @ActiveProfiles("tsetBased") @RunWith(SpringJUnit4ClassRunner.class) @SpringApplicationConfiguration(classes = ServiceConfigurationTest.class) public class EvaluationServiceTest { // private static final Logger logger = // LoggerFactory.getLogger(EvaluationServiceTest.class); public EvaluationServiceImpl evaluationService; @Autowired TestDocumentRepository testDocumentRepository; @Autowired private EvaluationReportRepository evaluationReportRepository; @Autowired MongoTestDataSet mongoTestDataSet; @Autowired private LuceneTestTrainingDataSet luceneTestDataSet; @Autowired private CategoryRepository categoryRepository; private String[] legacyCategories = new String[] { "Construction industries", "Labour" }; private static final String CAT_BAD_ACCURACY = "Category with Bad Accuracy"; private static final String CAT_MEDIUM_RECALL = "Category with Bad Recall"; private static final String CAT_GOOD = "Category with Good Accuracy and Recall"; private static final String CAT_FROM_LEGACY_SYSTEM_NOT_KNOWN = "Category Not known in new System"; private static final String CAT_FROM_CURRENT_SYSTEM_NOT_FOUND = "Category from current system not Found anywhere"; @Before public void initMocks() throws IOException { evaluationService = new EvaluationServiceImpl(categoryRepository, getIaViewRepositoryMock(), testDocumentRepository, evaluationReportRepository, getLegacySystemServiceMock(), null); } @After public void dropDatabse() { mongoTestDataSet.dropDatabase(); } /** * TEST CREATE TEST DATASET */ @Test public void testCreateTestDataSet() { mongoTestDataSet.initCategoryCollectionWith1element(); assertThatTestDocDbIsEmpty(); evaluationService.createEvaluationTestDataset(10); assertThatTestDocDbContainsDocsWithLegacyCategories(); } private void assertThatTestDocDbContainsDocsWithLegacyCategories() { assertThat(testDocumentRepository.count(), is(equalTo(1l))); Iterable<TestDocument> trainingDocuments = testDocumentRepository.findAll(); assertThat(trainingDocuments, is(notNullValue())); TestDocument doc = trainingDocuments.iterator().next(); assertThat(doc, is(notNullValue())); assertThat(doc.getLegacyCategories(), is(not(emptyArray()))); } private void assertThatTestDocDbIsEmpty() { assertThat(testDocumentRepository.count(), is(equalTo(0l))); } /** * TEST RUN CATEGORISATION ON TEST DATASET */ @Test public void testRunCategorisationOnTestDataSet() { initTestDocumentRepositoryWithOneElementWithLegacyCategories(); evaluationService = new EvaluationServiceImpl(categoryRepository, getIaViewRepositoryMock(), testDocumentRepository, evaluationReportRepository, getLegacySystemServiceMock(), getCategoriserServiceMock()); evaluationService.runCategorisationOnTestDataSet(true); assertThatTestDocumentContainsCurrentSystemCategories(); } private void assertThatTestDocumentContainsCurrentSystemCategories() { assertThat(testDocumentRepository.count(), is(equalTo(1l))); TestDocument doc = testDocumentRepository.findAll().iterator().next(); assertThat(doc, notNullValue()); assertThat(doc.getCategories(), notNullValue()); assertThat(doc.getCategories(), is(not(emptyArray()))); } private void initTestDocumentRepositoryWithOneElementWithLegacyCategories() { TestDocument testDocument = TaxonomyMapper.getTestDocumentFromIAView(MongoTestDataSet.getIAViewSample()); testDocument.setLegacyCategories(legacyCategories); testDocumentRepository.save(testDocument); } /** * TEST EVALUATION REPORT */ @Test public void testGetEvaluationReport() { initCategoryCollection(); initTestDocumentRepositoryWithSeveralCompleteDocs(); EvaluationReport report = evaluationService.getEvaluationReport("test report"); assertThat(report, notNullValue()); assertThat(report.getResults(), is(notNullValue())); assertThat(report.getResults(), is(not(empty()))); assertThat("there should be 4 category evaluation results", report.getResults().size(), is(equalTo(5))); assertThat("there should be 3 documents processed", report.getNumberOfDocuments(), is(equalTo(3))); DecimalFormat df = new DecimalFormat("#.####"); assertThat(report.getAvgRecall(), is(notNullValue())); assertThat("Global recall should be 8/15", df.format(report.getAvgRecall()), is(equalTo(df.format(1.0d * 8 / 15)))); assertThat(report.getAvgAccuracy(), is(notNullValue())); assertThat("Global accuracy should be 7/15", df.format(report.getAvgAccuracy()), is(equalTo(df.format(1.0d * 7 / 15)))); assertThat(evaluationReportRepository.count(), is(equalTo(1l))); int numNotFoundInCatRepo = 0; int numNotFoundInTDocCat = 0; int numNotFoundInTDocLegacyCat = 0; for (CategoryEvaluationResult categoryEvaluationResult : report.getResults()) { if (!categoryEvaluationResult.isFoundInCatRepo()) { numNotFoundInCatRepo++; } if (!categoryEvaluationResult.isFoundInTDocCat()) { numNotFoundInTDocCat++; } if (!categoryEvaluationResult.isFoundInTDocLegacyCat()) { numNotFoundInTDocLegacyCat++; } } assertThat(numNotFoundInCatRepo, is(equalTo(1))); assertThat(numNotFoundInTDocCat, is(equalTo(2))); assertThat(numNotFoundInTDocLegacyCat, is(equalTo(1))); } private void initCategoryCollection() { saveCategory(CAT_BAD_ACCURACY); saveCategory(CAT_MEDIUM_RECALL); saveCategory(CAT_GOOD); saveCategory(CAT_FROM_CURRENT_SYSTEM_NOT_FOUND); } private void saveCategory(String categoryTtl) { Category category = new Category(); category.setCiaid(categoryTtl); category.setTtl(categoryTtl); category.setLck(false); category.setSc(0.0); categoryRepository.save(category); } private void initTestDocumentRepositoryWithSeveralCompleteDocs() { TestDocument testDocument1 = TaxonomyMapper.getTestDocumentFromIAView(MongoTestDataSet.getIAViewSample()); testDocument1.setDocReference("DOC1"); testDocument1.setCategories(new String[] { CAT_BAD_ACCURACY, CAT_MEDIUM_RECALL }); testDocument1.setLegacyCategories(new String[] { CAT_BAD_ACCURACY, CAT_MEDIUM_RECALL }); testDocumentRepository.save(testDocument1); TestDocument testDocument2 = TaxonomyMapper.getTestDocumentFromIAView(MongoTestDataSet.getIAViewSample()); testDocument2.setDocReference("DOC2"); testDocument2.setCategories(new String[] { CAT_BAD_ACCURACY, CAT_MEDIUM_RECALL }); testDocument2.setLegacyCategories(new String[] { CAT_MEDIUM_RECALL }); testDocumentRepository.save(testDocument2); TestDocument testDocument3 = TaxonomyMapper.getTestDocumentFromIAView(MongoTestDataSet.getIAViewSample()); testDocument3.setDocReference("DOC3"); testDocument3.setCategories(new String[] { CAT_BAD_ACCURACY, CAT_GOOD }); testDocument3.setLegacyCategories( new String[] { CAT_MEDIUM_RECALL, CAT_GOOD, CAT_FROM_LEGACY_SYSTEM_NOT_KNOWN }); testDocumentRepository.save(testDocument3); } /** * * MOCKS * */ private CategoriserService getCategoriserServiceMock() { CategoriserService categoriserService = Mockito.mock(CategoriserService.class); List<TSetBasedCategorisationResult> categorisationResults = new ArrayList<TSetBasedCategorisationResult>(); categorisationResults.addAll(Arrays.asList(new TSetBasedCategorisationResult("Labour", 1.12f, 10), new TSetBasedCategorisationResult("Forestry", 1.02f, 15), new TSetBasedCategorisationResult("Forestry", 0.12f, 2))); Mockito.when(categoriserService.testCategoriseSingle(Mockito.any(String.class))) .thenReturn(categorisationResults); return categoriserService; } private IAViewRepository getIaViewRepositoryMock() { IAViewRepository iaViewRepositoryMock = Mockito.mock(IAViewRepository.class); PaginatedList<InformationAssetView> searchResult = new PaginatedList<InformationAssetView>(); searchResult.setNumberOfResults(1); searchResult.setResults(Arrays.asList(MongoTestDataSet.getIAViewSample())); Mockito.when(iaViewRepositoryMock.performSearch(Mockito.anyString(), Mockito.anyDouble(), Mockito.anyInt(), Mockito.anyInt())).thenReturn(searchResult); Mockito.when(iaViewRepositoryMock.searchIAViewIndexByFieldAndPhrase(Mockito.anyString(), Mockito.anyString(), Mockito.anyInt())) .thenReturn(new TopDocs(1, new ScoreDoc[] { new ScoreDoc(0, 1f) }, 1f)); Mockito.when(iaViewRepositoryMock.getDoc(Mockito.any(ScoreDoc.class))) .thenReturn(LuceneTaxonomyMapper.getLuceneDocumentFromIAView(MongoTestDataSet.getIAViewSample())); return iaViewRepositoryMock; } private LegacySystemRepository getLegacySystemServiceMock() { LegacySystemRepository legacySystemServiceMock = Mockito.mock(LegacySystemRepository.class); HashMap<String, String[]> mapOfLegacyDocuments = new HashMap<String, String[]>(); mapOfLegacyDocuments.put("C465432", new String[] { "Air Force", "Medals" }); Mockito.when(legacySystemServiceMock.findLegacyDocumentsByCategory(Mockito.anyString(), Mockito.anyInt())) .thenReturn(mapOfLegacyDocuments); return legacySystemServiceMock; } }