Java tutorial
/*********************************************************************************************************************** * Copyright (C) 2014 by Sebastian Kruse * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package de.hpi.isg.mdms.benchmark; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Random; import de.hpi.isg.mdms.dependencies.TestConstraint; import de.hpi.isg.mdms.model.constraints.Constraint; import org.apache.commons.lang3.ArrayUtils; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import de.hpi.isg.mdms.model.constraints.ConstraintCollection; import de.hpi.isg.mdms.model.MetadataStore; import de.hpi.isg.mdms.domain.constraints.DistinctValueCount; import de.hpi.isg.mdms.domain.constraints.InclusionDependency; import de.hpi.isg.mdms.domain.constraints.UniqueColumnCombination; import de.hpi.isg.mdms.domain.factories.MetadataStoreFactory; import de.hpi.isg.mdms.rdbms.SQLiteInterface; import de.hpi.isg.mdms.domain.RDBMSMetadataStore; import de.hpi.isg.mdms.domain.constraints.SingleTargetReference; import de.hpi.isg.mdms.model.location.DefaultLocation; import de.hpi.isg.mdms.model.targets.Column; import de.hpi.isg.mdms.model.targets.Schema; import de.hpi.isg.mdms.model.targets.Table; /** * A little test set to quantify the performance of constraint insertions into a metadata store. * * @author Sebastian Kruse */ public class ConstraintInsertPerfomanceBenchmark { private static final Logger LOGGER = LoggerFactory.getLogger(ConstraintInsertPerfomanceBenchmark.class); private File createTempFile(String suffix) throws IOException { File file = File.createTempFile("metadatastore", suffix); file.deleteOnExit(); LOGGER.debug("Using temporary file {}.", file.getAbsolutePath()); return file; } private Schema createSchema(MetadataStore metadataStore, int numTables, int numColumnsPerTable) { Schema schema = metadataStore.addSchema("test-schema", null, new DefaultLocation()); for (int tableNum = 0; tableNum < numTables; tableNum++) { Table table = schema.addTable(metadataStore, String.format("test-table-%04d", tableNum), null, new DefaultLocation()); for (int columnNum = 0; columnNum < numColumnsPerTable; columnNum++) { table.addColumn(metadataStore, String.format("test-column-%04d", columnNum), null, columnNum); } } return schema; } @Test public void testInsertDistinctValueCountsIntoDefaultMetadataStore() throws Exception { LOGGER.info("Creating Java-serialized metadata store..."); File metadataStoreFile = createTempFile("ser"); MetadataStore metadataStore = MetadataStoreFactory.createAndSaveDefaultMetadataStore(metadataStoreFile); LOGGER.info("Creating schema..."); int numTables = 1000; int numColumnsPerTable = 100; int numColumns = numTables * numColumnsPerTable; Schema schema = createSchema(metadataStore, numTables, numColumnsPerTable); metadataStore.flush(); LOGGER.info("Inserting {} distinct value counts...", numColumns); long startTimeGross = System.currentTimeMillis(); ConstraintCollection<DistinctValueCount> constraintCollection = metadataStore .createConstraintCollection(null, DistinctValueCount.class); long startTimeNet = System.currentTimeMillis(); for (Table table : schema.getTables()) { for (Column column : table.getColumns()) { DistinctValueCount dvCount = DistinctValueCount.buildAndAddToCollection( new SingleTargetReference(column.getId()), constraintCollection, 100); constraintCollection.add(dvCount); } } long endTimeNet = System.currentTimeMillis(); metadataStore.flush(); long endTimeGross = System.currentTimeMillis(); double numInsertsPerSecGross = 1000d * numColumns / (endTimeGross - startTimeGross); double numInsertsPerSecNet = 1000d * numColumns / (endTimeNet - startTimeNet); LOGGER.info("[gross] Inserted in {} ms ({} inserts/s)", endTimeGross - startTimeGross, numInsertsPerSecGross); LOGGER.info("[net] Inserted in {} ms ({} inserts/s)", endTimeNet - startTimeNet, numInsertsPerSecNet); LOGGER.info("File size: {} MB", metadataStoreFile.length() / (1024 * 1024)); } @Test public void testInsertDistinctValueCountsIntoRDBMSMetadataStore() throws Exception { LOGGER.info("Creating RDBMS metadata store..."); File metadataStoreFile = createTempFile("sqlite"); MetadataStore metadataStore = RDBMSMetadataStore .createNewInstance(SQLiteInterface.createForFile(metadataStoreFile)); LOGGER.info("Creating schema..."); int numTables = 1000; int numColumnsPerTable = 100; int numColumns = numTables * numColumnsPerTable; Schema schema = createSchema(metadataStore, numTables, numColumnsPerTable); metadataStore.flush(); LOGGER.info("Collecting all columns...", numColumns); List<Column> allColumns = new ArrayList<>(); for (Table table : schema.getTables()) { for (Column column : table.getColumns()) { allColumns.add(column); } } LOGGER.info("Inserting {} distinct value counts...", numColumns); long startTimeGross = System.currentTimeMillis(); ConstraintCollection<DistinctValueCount> constraintCollection = metadataStore .createConstraintCollection(null, DistinctValueCount.class); long startTimeNet = System.currentTimeMillis(); for (Column column : allColumns) { DistinctValueCount dvCount = DistinctValueCount .buildAndAddToCollection(new SingleTargetReference(column.getId()), constraintCollection, 100); constraintCollection.add(dvCount); } long endTimeNet = System.currentTimeMillis(); metadataStore.flush(); long endTimeGross = System.currentTimeMillis(); double numInsertsPerSecGross = 1000d * numColumns / (endTimeGross - startTimeGross); double numInsertsPerSecNet = 1000d * numColumns / (endTimeNet - startTimeNet); LOGGER.info("[gross] Inserted in {} ms ({} inserts/s)", endTimeGross - startTimeGross, numInsertsPerSecGross); LOGGER.info("[net] Inserted in {} ms ({} inserts/s)", endTimeNet - startTimeNet, numInsertsPerSecNet); LOGGER.info("File size: {} MB", metadataStoreFile.length() / (1024 * 1024)); } @Test public void testInsertInclusionDependenciesIntoDefaultMetadataStore() throws Exception { LOGGER.info("Creating Java-serialized metadata store..."); File metadataStoreFile = createTempFile("ser"); MetadataStore metadataStore = MetadataStoreFactory.createAndSaveDefaultMetadataStore(metadataStoreFile); LOGGER.info("Creating schema..."); int numTables = 1000; int numColumnsPerTable = 100; int numColumns = numTables * numColumnsPerTable; Schema schema = createSchema(metadataStore, numTables, numColumnsPerTable); metadataStore.flush(); LOGGER.info("Generating INDs..."); int numDesiredInds = 100000; double indProbablity = numDesiredInds / Math.pow(numTables * numColumnsPerTable, 2); // Boost probablity to speed up generation. indProbablity = Math.sqrt(indProbablity); Collection<Column[]> inclusionDependencies = new LinkedList<Column[]>(); Random random = new Random(); OuterLoop: for (final Table table1 : schema.getTables()) { for (final Table table2 : schema.getTables()) { for (final Column column1 : table1.getColumns()) { for (final Column column2 : table2.getColumns()) { if (column1 != column2 && random.nextDouble() <= indProbablity) { inclusionDependencies.add(new Column[] { column1, column2 }); if (inclusionDependencies.size() >= numDesiredInds) { break OuterLoop; } } } } } } LOGGER.info("Inserting the {} generated INDs...", inclusionDependencies.size()); long startTimeGross = System.currentTimeMillis(); ConstraintCollection<InclusionDependency> constraintCollection = metadataStore .createConstraintCollection(null, InclusionDependency.class); long startTimeNet = System.currentTimeMillis(); for (Column[] columnPair : inclusionDependencies) { Collection<Column> dependentColumns = Collections.singleton(columnPair[0]); Collection<Column> referencedColumns = Collections.singletonList(columnPair[1]); final InclusionDependency.Reference reference = new InclusionDependency.Reference( dependentColumns.toArray(new Column[dependentColumns.size()]), referencedColumns.toArray(new Column[referencedColumns.size()])); InclusionDependency.buildAndAddToCollection(reference, constraintCollection); } long endTimeNet = System.currentTimeMillis(); metadataStore.flush(); long endTimeGross = System.currentTimeMillis(); double numInsertsPerSecGross = 1000d * numColumns / (endTimeGross - startTimeGross); double numInsertsPerSecNet = 1000d * numColumns / (endTimeNet - startTimeNet); LOGGER.info("[gross] Inserted in {} ms ({} inserts/s)", endTimeGross - startTimeGross, numInsertsPerSecGross); LOGGER.info("[net] Inserted in {} ms ({} inserts/s)", endTimeNet - startTimeNet, numInsertsPerSecNet); LOGGER.info("File size: {} MB", metadataStoreFile.length() / (1024 * 1024)); } @Test public void testInsertInclusionDependenciesIntoRDBMSMetadataStore() throws Exception { LOGGER.info("Creating RDBMS metadata store..."); File metadataStoreFile = createTempFile("sqlite"); MetadataStore metadataStore = RDBMSMetadataStore .createNewInstance(SQLiteInterface.createForFile(metadataStoreFile)); LOGGER.info("Creating schema..."); int numTables = 1000; int numColumnsPerTable = 100; int numColumns = numTables * numColumnsPerTable; Schema schema = createSchema(metadataStore, numTables, numColumnsPerTable); metadataStore.flush(); LOGGER.info("Generating INDs..."); int numDesiredInds = 100000; double indProbablity = numDesiredInds / Math.pow(numTables * numColumnsPerTable, 2); // Boost probablity to speed up generation. indProbablity = Math.sqrt(indProbablity); Collection<Column[]> inclusionDependencies = new LinkedList<Column[]>(); Random random = new Random(); OuterLoop: for (final Table table1 : schema.getTables()) { for (final Table table2 : schema.getTables()) { for (final Column column1 : table1.getColumns()) { for (final Column column2 : table2.getColumns()) { if (column1 != column2 && random.nextDouble() <= indProbablity) { inclusionDependencies.add(new Column[] { column1, column2 }); if (inclusionDependencies.size() >= numDesiredInds) { break OuterLoop; } } } } } } LOGGER.info("Inserting the {} generated INDs...", inclusionDependencies.size()); long startTimeGross = System.currentTimeMillis(); ConstraintCollection<InclusionDependency> constraintCollection = metadataStore .createConstraintCollection(null, InclusionDependency.class); long startTimeNet = System.currentTimeMillis(); for (Column[] columnPair : inclusionDependencies) { Collection<Column> dependentColumns = Collections.singleton(columnPair[0]); Collection<Column> referencedColumns = Collections.singletonList(columnPair[1]); final InclusionDependency.Reference reference = new InclusionDependency.Reference( dependentColumns.toArray(new Column[dependentColumns.size()]), referencedColumns.toArray(new Column[referencedColumns.size()])); InclusionDependency.buildAndAddToCollection(reference, constraintCollection); } long endTimeNet = System.currentTimeMillis(); metadataStore.flush(); long endTimeGross = System.currentTimeMillis(); double numInsertsPerSecGross = 1000d * numColumns / (endTimeGross - startTimeGross); double numInsertsPerSecNet = 1000d * numColumns / (endTimeNet - startTimeNet); LOGGER.info("[gross] Inserted in {} ms ({} inserts/s)", endTimeGross - startTimeGross, numInsertsPerSecGross); LOGGER.info("[net] Inserted in {} ms ({} inserts/s)", endTimeNet - startTimeNet, numInsertsPerSecNet); LOGGER.info("File size: {} MB", metadataStoreFile.length() / (1024 * 1024)); } @Test public void testInsertUniqueColumnCombinationsIntoDefaultMetadataStore() throws Exception { LOGGER.info("Creating Java-serialized metadata store..."); File metadataStoreFile = createTempFile("ser"); MetadataStore metadataStore = MetadataStoreFactory.createAndSaveDefaultMetadataStore(metadataStoreFile); LOGGER.info("Creating schema..."); int numTables = 1000; int numColumnsPerTable = 100; int numColumns = numTables * numColumnsPerTable; Schema schema = createSchema(metadataStore, numTables, numColumnsPerTable); metadataStore.flush(); LOGGER.info("Generating UCCs..."); int numDesiredInds = 100000; double indProbablity = numDesiredInds / Math.pow(numTables * numColumnsPerTable, 2); // Boost probablity to speed up generation. indProbablity = Math.sqrt(indProbablity); Collection<Column[]> inclusionDependencies = new LinkedList<Column[]>(); Random random = new Random(); OuterLoop: for (final Table table1 : schema.getTables()) { for (final Table table2 : schema.getTables()) { for (final Column column1 : table1.getColumns()) { for (final Column column2 : table2.getColumns()) { if (column1 != column2 && random.nextDouble() <= indProbablity) { inclusionDependencies.add(new Column[] { column1, column2 }); if (inclusionDependencies.size() >= numDesiredInds) { break OuterLoop; } } } } } } LOGGER.info("Inserting the {} generated UCCs...", inclusionDependencies.size()); long startTimeGross = System.currentTimeMillis(); ConstraintCollection<UniqueColumnCombination> constraintCollection = metadataStore .createConstraintCollection(null, UniqueColumnCombination.class); long startTimeNet = System.currentTimeMillis(); for (Column[] columnPair : inclusionDependencies) { Collection<Column> uniqueColumns = Collections.singleton(columnPair[0]); List<Integer> ids = new ArrayList<>(); for (Column c : uniqueColumns) { ids.add(c.getId()); } int[] intArray = ArrayUtils.toPrimitive(ids.toArray(new Integer[ids.size()])); final UniqueColumnCombination.Reference reference = new UniqueColumnCombination.Reference(intArray); UniqueColumnCombination.buildAndAddToCollection(reference, constraintCollection); } long endTimeNet = System.currentTimeMillis(); metadataStore.flush(); long endTimeGross = System.currentTimeMillis(); double numInsertsPerSecGross = 1000d * numColumns / (endTimeGross - startTimeGross); double numInsertsPerSecNet = 1000d * numColumns / (endTimeNet - startTimeNet); LOGGER.info("[gross] Inserted in {} ms ({} inserts/s)", endTimeGross - startTimeGross, numInsertsPerSecGross); LOGGER.info("[net] Inserted in {} ms ({} inserts/s)", endTimeNet - startTimeNet, numInsertsPerSecNet); LOGGER.info("File size: {} MB", metadataStoreFile.length() / (1024 * 1024)); } @Test public void testInsertUniqueColumnCombinationsIntoRDBMSMetadataStore() throws Exception { LOGGER.info("Creating RDBMS metadata store..."); File metadataStoreFile = createTempFile("sqlite"); MetadataStore metadataStore = RDBMSMetadataStore .createNewInstance(SQLiteInterface.createForFile(metadataStoreFile)); LOGGER.info("Creating schema..."); int numTables = 1000; int numColumnsPerTable = 100; int numColumns = numTables * numColumnsPerTable; Schema schema = createSchema(metadataStore, numTables, numColumnsPerTable); metadataStore.flush(); LOGGER.info("Generating UCCs..."); int numDesiredInds = 100000; double indProbablity = numDesiredInds / Math.pow(numTables * numColumnsPerTable, 2); // Boost probablity to speed up generation. indProbablity = Math.sqrt(indProbablity); Collection<Column[]> inclusionDependencies = new LinkedList<Column[]>(); Random random = new Random(); OuterLoop: for (final Table table1 : schema.getTables()) { for (final Table table2 : schema.getTables()) { for (final Column column1 : table1.getColumns()) { for (final Column column2 : table2.getColumns()) { if (column1 != column2 && random.nextDouble() <= indProbablity) { inclusionDependencies.add(new Column[] { column1, column2 }); if (inclusionDependencies.size() >= numDesiredInds) { break OuterLoop; } } } } } } LOGGER.info("Inserting the {} generated UCCs...", inclusionDependencies.size()); long startTimeGross = System.currentTimeMillis(); ConstraintCollection<UniqueColumnCombination> constraintCollection = metadataStore .createConstraintCollection(null, UniqueColumnCombination.class); long startTimeNet = System.currentTimeMillis(); for (Column[] columnPair : inclusionDependencies) { Collection<Column> uniqueColumns = Collections.singleton(columnPair[0]); List<Integer> ids = new ArrayList<>(); for (Column c : uniqueColumns) { ids.add(c.getId()); } int[] intArray = ArrayUtils.toPrimitive(ids.toArray(new Integer[ids.size()])); final UniqueColumnCombination.Reference reference = new UniqueColumnCombination.Reference(intArray); UniqueColumnCombination.buildAndAddToCollection(reference, constraintCollection); } long endTimeNet = System.currentTimeMillis(); metadataStore.flush(); long endTimeGross = System.currentTimeMillis(); double numInsertsPerSecGross = 1000d * numColumns / (endTimeGross - startTimeGross); double numInsertsPerSecNet = 1000d * numColumns / (endTimeNet - startTimeNet); LOGGER.info("[gross] Inserted in {} ms ({} inserts/s)", endTimeGross - startTimeGross, numInsertsPerSecGross); LOGGER.info("[net] Inserted in {} ms ({} inserts/s)", endTimeNet - startTimeNet, numInsertsPerSecNet); LOGGER.info("File size: {} MB", metadataStoreFile.length() / (1024 * 1024)); } }