Java tutorial
/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.test.full.scenarios; import java.util.Collection; import java.util.Date; import junit.framework.TestCase; import org.apache.commons.lang.SerializationUtils; import org.apache.metamodel.util.ObjectComparator; import org.datacleaner.api.AnalyzerResult; import org.datacleaner.api.InputColumn; import org.datacleaner.beans.dategap.DateGapAnalyzer; import org.datacleaner.components.convert.ConvertToStringTransformer; import org.datacleaner.components.maxrows.MaxRowsFilter; import org.datacleaner.configuration.DataCleanerConfiguration; import org.datacleaner.configuration.DataCleanerConfigurationImpl; import org.datacleaner.configuration.DataCleanerEnvironmentImpl; import org.datacleaner.connection.Datastore; import org.datacleaner.descriptors.Descriptors; import org.datacleaner.descriptors.SimpleDescriptorProvider; import org.datacleaner.job.AnalysisJob; import org.datacleaner.job.AnalyzerJob; import org.datacleaner.job.ComponentJob; import org.datacleaner.job.builder.AnalysisJobBuilder; import org.datacleaner.job.builder.AnalyzerComponentBuilder; import org.datacleaner.job.builder.FilterComponentBuilder; import org.datacleaner.job.builder.TransformerComponentBuilder; import org.datacleaner.job.runner.AnalysisResultFuture; import org.datacleaner.job.runner.AnalysisRunnerImpl; import org.datacleaner.result.AnalysisResult; import org.datacleaner.result.SimpleAnalysisResult; import org.datacleaner.test.TestHelper; import org.datacleaner.util.CollectionUtils2; public class AnalyzeDateGapsCompareSchemasAndSerializeResultsTest extends TestCase { @SuppressWarnings("unchecked") public void testScenario() throws Throwable { final DataCleanerConfiguration configuration; { // create configuration SimpleDescriptorProvider descriptorProvider = new SimpleDescriptorProvider(); descriptorProvider.addAnalyzerBeanDescriptor(Descriptors.ofAnalyzer(DateGapAnalyzer.class)); descriptorProvider.addFilterBeanDescriptor(Descriptors.ofFilter(MaxRowsFilter.class)); descriptorProvider .addTransformerBeanDescriptor(Descriptors.ofTransformer(ConvertToStringTransformer.class)); Datastore datastore = TestHelper.createSampleDatabaseDatastore("orderdb"); configuration = new DataCleanerConfigurationImpl().withDatastores(datastore) .withEnvironment(new DataCleanerEnvironmentImpl().withDescriptorProvider(descriptorProvider)); } AnalysisJob job; { // create job AnalysisJobBuilder analysisJobBuilder = new AnalysisJobBuilder(configuration); Datastore datastore = configuration.getDatastoreCatalog().getDatastore("orderdb"); analysisJobBuilder.setDatastore(datastore); analysisJobBuilder.addSourceColumns("PUBLIC.ORDERS.ORDERDATE", "PUBLIC.ORDERS.SHIPPEDDATE", "PUBLIC.ORDERS.CUSTOMERNUMBER"); assertEquals(3, analysisJobBuilder.getSourceColumns().size()); FilterComponentBuilder<MaxRowsFilter, MaxRowsFilter.Category> maxRows = analysisJobBuilder .addFilter(MaxRowsFilter.class); maxRows.getComponentInstance().setMaxRows(5); analysisJobBuilder.setDefaultRequirement(maxRows.getFilterOutcome(MaxRowsFilter.Category.VALID)); TransformerComponentBuilder<ConvertToStringTransformer> convertToNumber = analysisJobBuilder .addTransformer(ConvertToStringTransformer.class); convertToNumber.addInputColumn(analysisJobBuilder.getSourceColumnByName("customernumber")); InputColumn<String> customer_no = (InputColumn<String>) convertToNumber.getOutputColumns().get(0); AnalyzerComponentBuilder<DateGapAnalyzer> dateGap = analysisJobBuilder .addAnalyzer(DateGapAnalyzer.class); dateGap.setName("date gap job"); dateGap.getComponentInstance().setSingleDateOverlaps(true); dateGap.getComponentInstance() .setFromColumn((InputColumn<Date>) analysisJobBuilder.getSourceColumnByName("orderdate")); dateGap.getComponentInstance() .setToColumn((InputColumn<Date>) analysisJobBuilder.getSourceColumnByName("shippeddate")); dateGap.getComponentInstance().setGroupColumn(customer_no); job = analysisJobBuilder.toAnalysisJob(); analysisJobBuilder.close(); } AnalysisResultFuture future = new AnalysisRunnerImpl(configuration).run(job); if (future.isErrornous()) { throw future.getErrors().get(0); } assertTrue(future.isSuccessful()); SimpleAnalysisResult result1 = new SimpleAnalysisResult(future.getResultMap()); byte[] bytes = SerializationUtils.serialize(result1); SimpleAnalysisResult result2 = (SimpleAnalysisResult) SerializationUtils.deserialize(bytes); performResultAssertions(job, future); performResultAssertions(job, result1); performResultAssertions(job, result2); } private void performResultAssertions(AnalysisJob job, AnalysisResult result) { assertEquals(1, result.getResults().size()); Collection<ComponentJob> componentJobs = result.getResultMap().keySet(); componentJobs = CollectionUtils2.sorted(componentJobs, ObjectComparator.getComparator()); assertEquals("[ImmutableAnalyzerJob[name=date gap job,analyzer=Date gap analyzer]]", componentJobs.toString()); // using the original component jobs not only asserts that these exist // in the result, but also that the their deserialized clones are equal // (otherwise the results cannot be retrieved from the result map). final AnalyzerJob analyzerJob = job.getAnalyzerJobs().iterator().next(); final AnalyzerResult analyzerResult = result.getResult(analyzerJob); assertNotNull(analyzerResult); assertEquals("DateGapAnalyzerResult[gaps={121=[], 128=[], 141=[], 181=[], 363=[]}]", analyzerResult.toString()); } }