no.ssb.vtl.script.operations.FoldOperation.java Source code

Java tutorial

Introduction

Here is the source code for no.ssb.vtl.script.operations.FoldOperation.java

Source

package no.ssb.vtl.script.operations;

/*-
 * ========================LICENSE_START=================================
 * Java VTL
 * %%
 * Copyright (C) 2016 - 2017 Hadrien Kohl
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * =========================LICENSE_END==================================
 */

import com.google.common.base.MoreObjects;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import no.ssb.vtl.model.AbstractUnaryDatasetOperation;
import no.ssb.vtl.model.DataPoint;
import no.ssb.vtl.model.DataStructure;
import no.ssb.vtl.model.Dataset;
import no.ssb.vtl.model.VTLObject;

import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Stream;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static no.ssb.vtl.model.Component.Role;

/**
 * Fold clause.
 */
public class FoldOperation extends AbstractUnaryDatasetOperation {

    private final String dimension;
    private final String measure;
    private final ImmutableSet<String> elements;

    private int[] copyIndices;
    private int[] elementIndices;
    private int measureIndex;
    private int dimensionIndex;
    private int size;
    private String[] elementNames;

    public FoldOperation(Dataset dataset, String dimension, String measure, Set<String> elements) {
        super(dataset);

        this.elements = ImmutableSet.copyOf(elements);
        checkArgument(!elements.isEmpty());

        this.dimension = checkNotNull(dimension);
        checkArgument(!dimension.isEmpty());

        this.measure = checkNotNull(measure);
        checkArgument(!measure.isEmpty());

    }

    /**
     * Check that all of the given columns are of the same type
     *
     * @return the type of all the columns.
     * @throws IllegalArgumentException if columns are not of the same type.
     */
    private static Class<?> checkColumnType(DataStructure structure, ImmutableSet<String> columns) {
        ListMultimap<Class<?>, String> classes = ArrayListMultimap.create();
        for (String element : columns) {
            classes.put(structure.get(element).getType(), element);
        }
        checkArgument(classes.asMap().size() == 1, "all element(s) [%s] are not of the same type [%s]", columns,
                classes);
        return classes.keys().iterator().next();
    }

    /**
     * Checks that all of the given columns are present in the structure.
     *
     * @throws IllegalArgumentException if one or more columns are missing.
     */
    private static void checkContainsColumns(DataStructure structure, ImmutableSet<String> columns) {
        Set<String> structureColumns = ImmutableSet.copyOf(structure.keySet());
        checkArgument(structureColumns.containsAll(columns), "the element(s) [%s] were not found",
                Sets.difference(columns, structureColumns));
    }

    /**
     * Checks that none of the given columns have the identifier role.
     *
     * @throws IllegalArgumentException if one or more columns have the identifier role.
     */
    private static void checkNoIdentifiers(DataStructure structure, ImmutableSet<String> columns) {
        ListMultimap<Role, String> roles = ArrayListMultimap.create();
        for (String elements : columns) {
            roles.put(structure.get(elements).getRole(), elements);
        }
        checkArgument(!roles.containsKey(Role.IDENTIFIER), "cannot fold identifier(s) [%s]",
                roles.get(Role.IDENTIFIER));
    }

    @Override
    public DataStructure computeDataStructure() {

        // In its normal form, all elements should be present in a fold operation.
        // This should be picked up by the parser but we check once anyways.
        DataStructure childStructure = getChild().getDataStructure();

        checkContainsColumns(childStructure, elements);

        // All the elements should be of the same type.
        Class<?> newType = checkColumnType(childStructure, elements);

        // Cannot remove identifiers without grouping.
        checkNoIdentifiers(childStructure, elements);

        // Copy each columns that is not in elements.
        // Putting them first optimizes the fold operation.
        DataStructure.Builder structureBuilder = DataStructure.builder();
        for (String columnName : Sets.difference(childStructure.keySet(), elements)) {
            structureBuilder.put(columnName, childStructure.get(columnName));
        }

        structureBuilder.put(dimension, Role.IDENTIFIER, String.class);
        structureBuilder.put(measure, Role.MEASURE, newType);
        DataStructure structure = structureBuilder.build();

        computeIndices(structure);

        return structure;
    }

    /**
     * Create indices used by the fold operation.
     *
     * @param structure the new structure.
     */
    private void computeIndices(DataStructure structure) {
        ImmutableSet<String> originalColumns = ImmutableSet.copyOf(getChild().getDataStructure().keySet());
        ImmutableSet<String> columns = ImmutableSet.copyOf(structure.keySet());

        // Indices of the common columns.
        copyIndices = Sets.intersection(columns, originalColumns).stream()
                .mapToInt(originalColumns.asList()::indexOf).toArray();

        elementIndices = Sets.intersection(elements, originalColumns).stream()
                .mapToInt(originalColumns.asList()::indexOf).toArray();

        elementNames = elements.asList().toArray(new String[] {});

        measureIndex = columns.asList().indexOf(measure);
        dimensionIndex = columns.asList().indexOf(dimension);
        size = columns.size();
    }

    /**
     * Fold a single datapoint.
     */
    private Stream<DataPoint> fold(DataPoint dataPoint) {

        // Create a new datapoint
        DataPoint baseDatapoint = DataPoint.create(size);
        for (int i = 0; i < copyIndices.length; i++) {
            baseDatapoint.set(i, dataPoint.get(copyIndices[i]));
        }

        List<DataPoint> foldedDatapoints = Lists.newArrayListWithCapacity(elements.size());
        for (int i = 0; i < elementIndices.length; i++) {

            VTLObject elementValue = dataPoint.get(elementIndices[i]);
            if (VTLObject.NULL == elementValue || elementValue == null || elementValue.get() == null) {
                continue;
            }

            // TODO: Still unclear from benchmark.
            //DataPoint clone = DataPoint.create(baseDatapoint);
            DataPoint clone = (DataPoint) baseDatapoint.clone();

            clone.set(dimensionIndex, VTLObject.of(elementNames[i]));
            clone.set(measureIndex, elementValue);

            foldedDatapoints.add(clone);
        }

        return foldedDatapoints.stream();

    }

    @Override
    public Stream<DataPoint> getData() {
        // To initialize the indices.
        getDataStructure();

        return getChild().getData().flatMap(this::fold);
    }

    @Override
    public Optional<Map<String, Integer>> getDistinctValuesCount() {
        return getChild().getDistinctValuesCount();
    }

    @Override
    public Optional<Long> getSize() {
        return getChild().getSize().map(size -> size * elements.size());
    }

    @Override
    public String toString() {
        MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this);
        helper.addValue(elements);
        helper.add("identifier", dimension);
        helper.add("measure", measure);
        return helper.omitNullValues().toString();
    }
}