de.tu_berlin.dima.oligos.profiler.PseudoColumnProfiler.java Source code

Java tutorial

Introduction

Here is the source code for de.tu_berlin.dima.oligos.profiler.PseudoColumnProfiler.java

Source

/*******************************************************************************
 * Copyright 2013 DIMA Research Group, TU Berlin (http://www.dima.tu-berlin.de)
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package de.tu_berlin.dima.oligos.profiler;

import java.sql.SQLException;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import com.google.common.collect.Maps;

import de.tu_berlin.dima.oligos.db.ColumnConnector;
import de.tu_berlin.dima.oligos.stat.Column;
import de.tu_berlin.dima.oligos.stat.distribution.histogram.Histogram;
import de.tu_berlin.dima.oligos.stat.distribution.histogram.QuantileHistogram;
import de.tu_berlin.dima.oligos.stat.distribution.histogram.StringHistogram;
import de.tu_berlin.dima.oligos.type.util.Constraint;
import de.tu_berlin.dima.oligos.type.util.TypeInfo;
import de.tu_berlin.dima.oligos.type.util.parser.StringParser;

public class PseudoColumnProfiler extends ColumnProfiler<String> {

    public PseudoColumnProfiler(String schema, String table, String column, TypeInfo type, boolean isEnum,
            ColumnConnector<String> connector) {
        super(schema, table, column, type, isEnum, connector, null, new StringParser());
    }

    @Override
    public QuantileHistogram<String> getQuantileHistogram() {
        throw new UnsupportedOperationException();
    }

    @Override
    public Column<String> profile() {
        try {
            Map<String, Long> quantileHistogram = connector.getHistogram();
            Map<String, Long> mostFrequentValues = connector.getMostFrequentValues();
            Map<String, Long> exactValues = Maps.newTreeMap();
            for (Entry<String, Long> e : mostFrequentValues.entrySet()) {
                exactValues.put(e.getKey(), e.getValue());
            }

            if (!isEnum) {
                for (Entry<String, Long> e : quantileHistogram.entrySet()) {
                    String key = e.getKey();
                    long value = e.getValue();
                    if (!exactValues.containsKey(key)) {
                        exactValues.put(key, value);
                    }
                }
            }
            Set<Constraint> constraints = connector.getConstraints();
            String min = connector.getMin();
            String max = connector.getMax();
            long cardinality = exactValues.size();
            long numNulls = connector.getNumNulls();
            Histogram<String> distribution = new StringHistogram(exactValues);
            return new Column<String>(schema, table, column, type, constraints, min, max, cardinality, numNulls,
                    distribution, parser);
        } catch (SQLException e) {
            throw new RuntimeException(e);
        }
    }
}