Java tutorial
/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.execution.steps; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import org.diqube.data.column.ColumnShard; import org.diqube.data.types.dbl.DoubleColumnShard; import org.diqube.data.types.lng.LongColumnShard; import org.diqube.data.types.str.StringColumnShard; import org.diqube.execution.ColumnVersionManager; import org.diqube.execution.consumers.AbstractThreadedColumnValueConsumer; import org.diqube.execution.consumers.ColumnBuiltConsumer; import org.diqube.execution.consumers.ColumnValueConsumer; import org.diqube.execution.consumers.ColumnVersionBuiltConsumer; import org.diqube.execution.consumers.DoneConsumer; import org.diqube.execution.consumers.GenericConsumer; import org.diqube.executionenv.ExecutionEnvironment; import org.diqube.executionenv.VersionedExecutionEnvironment; import org.diqube.loader.columnshard.ColumnShardBuilderFactory; import org.diqube.loader.columnshard.SparseColumnShardBuilder; import org.diqube.queries.QueryRegistry; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Iterables; /** * Builds a temporary sparse column out of values provided by a {@link ColumnValueConsumer}. * * <p> * It builds a final column shard as soon as the input {@link ColumnValueConsumer} is fully done - if one is interested * in more updates, the {@link ColumnVersionBuiltConsumer} should be wired. The latter will receive as much updates as * possible with intermediate {@link ExecutionEnvironment}s containing the built column. * * <p> * Input: 1 {@link ColumnValueConsumer}<br> * Output: {@link ColumnBuiltConsumer} and {@link ColumnVersionBuiltConsumer} * * @author Bastian Gloeckle */ public class BuildColumnFromValuesStep extends AbstractThreadedExecutablePlanStep { private static final Logger logger = LoggerFactory.getLogger(BuildColumnFromValuesStep.class); private String colName; private AtomicBoolean sourceIsDone = new AtomicBoolean(false); private Object columnSync = new Object(); /** All values of the column we're interested in, keyed by rowId. Sync access with {@link #columnSync}. */ private Map<Long, Object> columnValues = new HashMap<>(); /** * Those rowIds that have been updated since the last run of {@link #execute()}. Sync access with {@link #columnSync}. */ private Set<Long> updatedRowIds = new HashSet<Long>(); /** <code>true</code> if there was at least one update for our col since the last run of {@link #execute()} */ private AtomicBoolean atLeastOneInterestingUpdate = new AtomicBoolean(false); private AbstractThreadedColumnValueConsumer columnValueConsumer = new AbstractThreadedColumnValueConsumer( this) { @Override protected void allSourcesAreDone() { BuildColumnFromValuesStep.this.sourceIsDone.set(true); } @Override protected void doConsume(String colName, Map<Long, Object> values) { if (!colName.equals(BuildColumnFromValuesStep.this.colName)) return; synchronized (columnSync) { columnValues.putAll(values); updatedRowIds.addAll(values.keySet()); atLeastOneInterestingUpdate.set(true); } } }; private ColumnShardBuilderFactory columnShardBuilderFactory; private ExecutionEnvironment defaultEnv; private ColumnVersionManager columnVersionManager; public BuildColumnFromValuesStep(int stepId, QueryRegistry queryRegistry, ExecutionEnvironment defaultEnv, String colName, ColumnShardBuilderFactory columnShardBuilderFactory, ColumnVersionManager columnVersionManager) { super(stepId, queryRegistry); this.defaultEnv = defaultEnv; this.colName = colName; this.columnShardBuilderFactory = columnShardBuilderFactory; this.columnVersionManager = columnVersionManager; } @Override protected void validateOutputConsumer(GenericConsumer consumer) throws IllegalArgumentException { if (!(consumer instanceof DoneConsumer) && !(consumer instanceof ColumnBuiltConsumer) && !(consumer instanceof ColumnVersionBuiltConsumer)) throw new IllegalArgumentException( "Only ColumnBuiltConsumer and ColumnVersionBuiltConsumer supported."); } @Override protected void execute() { // this is the last run of this execute method if the input source is fully done. boolean intermediateRun = !sourceIsDone.get(); if (intermediateRun && !existsOutputConsumerOfType(ColumnVersionBuiltConsumer.class)) // if this is NOT the last run (= there are more values to be provided), but there is no-one who'd listen to // intermediary updates, do not calculate them. return; if (intermediateRun && !atLeastOneInterestingUpdate.get()) return; Map<Long, Object> values; Set<Long> curUpdatedRowIds; synchronized (columnSync) { atLeastOneInterestingUpdate.set(false); if (columnValues == null || columnValues.isEmpty()) { if (!intermediateRun) { // source is done but we did not receive any data. Do not build column, just report "done". forEachOutputConsumerOfType(GenericConsumer.class, c -> c.sourceIsDone()); doneProcessing(); return; } return; } values = new HashMap<Long, Object>(columnValues); curUpdatedRowIds = updatedRowIds; updatedRowIds = new HashSet<>(); } long numberOfRows = values.keySet().stream().max(Long::compare).get() + 1; SparseColumnShardBuilder<Object> columnShardBuilder = columnShardBuilderFactory .createSparseColumnShardBuilder(colName); columnShardBuilder.withValues(values); columnShardBuilder.withNumberOfRows(numberOfRows); ColumnShard newColumn = columnShardBuilder.build(); // inform ColumnVersionBuiltConsumers if (existsOutputConsumerOfType(ColumnVersionBuiltConsumer.class)) { logger.trace("Building new column version for {} after adjusting rows (limt) {}", colName, Iterables.limit(curUpdatedRowIds, 500)); VersionedExecutionEnvironment newEnv = columnVersionManager.createNewVersion(newColumn); forEachOutputConsumerOfType(ColumnVersionBuiltConsumer.class, c -> c.columnVersionBuilt(newEnv, colName, curUpdatedRowIds)); } // if done, inform other consumers. if (!intermediateRun) { switch (newColumn.getColumnType()) { case STRING: defaultEnv.storeTemporaryStringColumnShard((StringColumnShard) newColumn); break; case LONG: defaultEnv.storeTemporaryLongColumnShard((LongColumnShard) newColumn); break; case DOUBLE: defaultEnv.storeTemporaryDoubleColumnShard((DoubleColumnShard) newColumn); break; } logger.trace("Built column {} from values received from a ColumnValueConsumer.", colName); forEachOutputConsumerOfType(ColumnBuiltConsumer.class, c -> c.columnBuilt(colName)); forEachOutputConsumerOfType(GenericConsumer.class, c -> c.sourceIsDone()); doneProcessing(); } } @Override protected List<GenericConsumer> inputConsumers() { return new ArrayList<>(Arrays.asList(new GenericConsumer[] { columnValueConsumer })); } @Override protected String getAdditionalToStringDetails() { return "colName=" + colName; } }