org.diqube.execution.steps.GroupIdAdjustingStep.java Source code

Java tutorial

Introduction

Here is the source code for org.diqube.execution.steps.GroupIdAdjustingStep.java

Source

/**
 * diqube: Distributed Query Base.
 *
 * Copyright (C) 2015 Bastian Gloeckle
 *
 * This file is part of diqube.
 *
 * diqube is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.diqube.execution.steps;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicBoolean;

import org.diqube.execution.consumers.AbstractThreadedColumnValueConsumer;
import org.diqube.execution.consumers.AbstractThreadedGroupIntermediaryAggregationConsumer;
import org.diqube.execution.consumers.ColumnValueConsumer;
import org.diqube.execution.consumers.DoneConsumer;
import org.diqube.execution.consumers.GenericConsumer;
import org.diqube.execution.consumers.GroupIntermediaryAggregationConsumer;
import org.diqube.execution.consumers.RowIdConsumer;
import org.diqube.function.IntermediaryResult;
import org.diqube.queries.QueryRegistry;
import org.diqube.util.Triple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

/**
 * As Group IDs are valid for one TableShard only, they need to be mapped to the group IDs of equal groups from other
 * cluster nodes when receiving updates on the query master. This step does that and provides the cleaned list of
 * groupIds (= row IDs) as {@link RowIdConsumer} output.
 * 
 * <p>
 * Input: {@link ColumnValueConsumer}, {@link GroupIntermediaryAggregationConsumer}<br>
 * Output: {@link GroupIntermediaryAggregationConsumer}, {@link RowIdConsumer}
 *
 * @author Bastian Gloeckle
 */
public class GroupIdAdjustingStep extends AbstractThreadedExecutablePlanStep {

    private static final Logger logger = LoggerFactory.getLogger(GroupIdAdjustingStep.class);

    private volatile ConcurrentMap<Long, Map<String, Object>> incomingGroupIdToValues = new ConcurrentHashMap<>();
    private AtomicBoolean columnValueSourceIsDone = new AtomicBoolean(false);

    private AbstractThreadedColumnValueConsumer columnValueConsumer = new AbstractThreadedColumnValueConsumer(
            this) {
        @Override
        protected void allSourcesAreDone() {
            GroupIdAdjustingStep.this.columnValueSourceIsDone.set(true);
        }

        @Override
        protected void doConsume(String colName, Map<Long, Object> values) {
            for (Entry<Long, Object> valueEntry : values.entrySet()) {
                Map<String, Object> valueMap = incomingGroupIdToValues.computeIfAbsent(valueEntry.getKey(),
                        l -> new ConcurrentHashMap<String, Object>());

                valueMap.put(colName, valueEntry.getValue());
            }
        }
    };

    /** sync additions/removals by value of {@link #incomingGroupIntermediariesSync}. */
    private volatile ConcurrentMap<Long, Deque<Triple<String, IntermediaryResult, IntermediaryResult>>> incomingGroupIntermediaries = new ConcurrentHashMap<>();

    private AtomicBoolean groupInputIsDone = new AtomicBoolean(false);

    private ConcurrentMap<Long, Object> incomingGroupIntermediariesSync = new ConcurrentHashMap<>();

    private AbstractThreadedGroupIntermediaryAggregationConsumer groupIntermediateAggregateConsumer = new AbstractThreadedGroupIntermediaryAggregationConsumer(
            this) {
        @Override
        protected void allSourcesAreDone() {
            GroupIdAdjustingStep.this.groupInputIsDone.set(true);
        }

        @Override
        protected void doConsumeIntermediaryAggregationResult(long groupId, String colName,
                IntermediaryResult oldIntermediaryResult, IntermediaryResult newIntermediaryResult) {
            incomingGroupIntermediariesSync.putIfAbsent(groupId, new Object());

            synchronized (incomingGroupIntermediariesSync.get(groupId)) {
                incomingGroupIntermediaries.compute(groupId, (key, value) -> {
                    if (value == null)
                        value = new ConcurrentLinkedDeque<Triple<String, IntermediaryResult, IntermediaryResult>>();
                    value.addLast(new Triple<>(colName, oldIntermediaryResult, newIntermediaryResult));
                    return value;
                });
            }
        }
    };

    private Set<String> groupedColumnNames;
    private Map<Long, Long> groupIdMap = new HashMap<>();
    private Map<Map<String, Object>, Long> valuesToGroupId = new HashMap<>();
    private Set<Long> allKnownGroupIds = new HashSet<>();

    public GroupIdAdjustingStep(int stepId, QueryRegistry queryRegistry, Set<String> groupedColumnNames) {
        super(stepId, queryRegistry);
        this.groupedColumnNames = groupedColumnNames;
    }

    @Override
    protected void validateOutputConsumer(GenericConsumer consumer) throws IllegalArgumentException {
        if (!(consumer instanceof DoneConsumer) && !(consumer instanceof GroupIntermediaryAggregationConsumer)
                && !(consumer instanceof RowIdConsumer))
            throw new IllegalArgumentException(
                    "Only GroupIntermediaryAggregationConsumer and RowIdConsumer supported.");
    }

    @Override
    protected void execute() {
        execute(true);
    }

    private void execute(boolean checkIfDone) {
        if (!incomingGroupIdToValues.isEmpty()) {
            incomingGroupIdToValues.keySet().removeAll(allKnownGroupIds);
            List<Long> newGroupIds = new ArrayList<>();

            List<Long> incomingGroupIds = new ArrayList<Long>(
                    Sets.difference(incomingGroupIdToValues.keySet(), allKnownGroupIds));
            incomingGroupIdToValues.keySet().removeAll(allKnownGroupIds);
            List<Long> groupIdsWorkedOn = new ArrayList<Long>();
            for (Long groupId : incomingGroupIds) {
                Map<String, Object> values = incomingGroupIdToValues.get(groupId);
                if (Sets.difference(groupedColumnNames, values.keySet()).isEmpty()) {
                    values = Maps.filterKeys(new HashMap<String, Object>(values),
                            colName -> groupedColumnNames.contains(colName));
                    if (valuesToGroupId.containsKey(values)) {
                        // we found a new groupId mapping!
                        long availableGroupId = valuesToGroupId.get(values);
                        groupIdMap.put(groupId, availableGroupId);
                        logger.trace("Mapping new group ID {} to group ID {}", groupId, availableGroupId);
                    } else {
                        // new group found
                        valuesToGroupId.put(values, groupId);
                        groupIdMap.put(groupId, groupId);
                        newGroupIds.add(groupId);
                        logger.trace("Found new group ID {}", groupId);
                    }
                    groupIdsWorkedOn.add(groupId);
                }
            }
            for (Long groupIdDone : groupIdsWorkedOn) {
                incomingGroupIdToValues.remove(groupIdDone);
                allKnownGroupIds.add(groupIdDone);
            }

            if (!newGroupIds.isEmpty())
                forEachOutputConsumerOfType(RowIdConsumer.class,
                        c -> c.consume(newGroupIds.stream().toArray(l -> new Long[l])));
        }

        processIncomingGroupIntermediaries();

        if (checkIfDone) {
            if ((groupInputIsDone.get() && isEmpty(incomingGroupIntermediaries)) || // all groups processed.
            // all inputs done, we though might not have processed everything yet.
                    (groupInputIsDone.get() && columnValueSourceIsDone.get())) {

                if (groupInputIsDone.get() && columnValueSourceIsDone.get())
                    // make sure we have processed everything, so lets execute one additional time.
                    execute(false);

                forEachOutputConsumerOfType(GenericConsumer.class, c -> c.sourceIsDone());
                doneProcessing();
            }
        }
    }

    private void processIncomingGroupIntermediaries() {
        if (!isEmpty(incomingGroupIntermediaries)) {
            List<Long> activeGroupIds = new ArrayList<>(
                    Sets.intersection(groupIdMap.keySet(), incomingGroupIntermediaries.keySet()));
            for (Long inputGroupId : activeGroupIds) {
                long newGroupId = groupIdMap.get(inputGroupId);

                Deque<Triple<String, IntermediaryResult, IntermediaryResult>> intermediaries = incomingGroupIntermediaries
                        .get(inputGroupId);

                if (intermediaries.isEmpty()) {
                    synchronized (incomingGroupIntermediariesSync.get(inputGroupId)) {
                        // double-checked locking since there might have been something added to the deque in the meantime.
                        if (intermediaries.isEmpty()) {
                            incomingGroupIntermediaries.remove(inputGroupId);
                            continue;
                        }
                    }
                }

                logger.trace("Processing collected changes for group {}", newGroupId);
                List<String> colNamesProcessed = new ArrayList<>();
                while (!intermediaries.isEmpty()) {
                    Triple<String, IntermediaryResult, IntermediaryResult> update = intermediaries.poll();

                    colNamesProcessed.add(update.getLeft());

                    forEachOutputConsumerOfType(GroupIntermediaryAggregationConsumer.class,
                            c -> c.consumeIntermediaryAggregationResult(newGroupId, update.getLeft(),
                                    update.getMiddle(), update.getRight()));
                }
                logger.trace("Processed collected changes for group {}, there were updates for cols {}", newGroupId,
                        colNamesProcessed);
            }
        }
    }

    private boolean isEmpty(Map<Long, Deque<Triple<String, IntermediaryResult, IntermediaryResult>>> map) {
        for (Deque<Triple<String, IntermediaryResult, IntermediaryResult>> deque : map.values()) {
            if (!deque.isEmpty())
                return false;
        }
        return true;
    }

    @Override
    protected List<GenericConsumer> inputConsumers() {
        return new ArrayList<>(
                Arrays.asList(new GenericConsumer[] { columnValueConsumer, groupIntermediateAggregateConsumer }));
    }

    @Override
    protected String getAdditionalToStringDetails() {
        return null;
    }

}