madgik.exareme.master.queryProcessor.analyzer.stat.Stat.java Source code

Java tutorial

Introduction

Here is the source code for madgik.exareme.master.queryProcessor.analyzer.stat.Stat.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

package madgik.exareme.master.queryProcessor.analyzer.stat;

import madgik.exareme.master.queryProcessor.analyzer.fanalyzer.OptiqueAnalyzer;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.log4j.Logger;

import java.sql.*;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

/**
 * @author jim
 */
public class Stat implements StatExtractor {
    // public static final int LIMIT = 10000;
    // public static final double LIMIT_FACTOR = 0.3;
    private static final int BLOB_SIZE = 1000000;
    private static final int NUM_SIZE = 8;
    private static final int MAX_STRING_SAMPLE = 20;
    // public static final String SAMPLE = "_sample";
    private static final Logger log = Logger.getLogger(StatExtractor.class);

    private final Connection con;
    private String sch;

    public Stat(Connection con) {
        sch = "";
        this.con = con;
    }

    // schema map
    private Map<String, Table> schema = new HashMap<String, Table>();

    @Override
    public Map<String, Table> extractStats() throws Exception {

        DatabaseMetaData dbmd = con.getMetaData(); // dtabase metadata object

        // listing tables and columns
        String catalog = null;
        String schemaPattern = null;
        String tableNamePattern = null;
        String[] types = null;
        String columnNamePattern = null;

        ResultSet resultTables = dbmd.getTables(catalog, schemaPattern, tableNamePattern, types);
        log.debug("Starting extracting stats");
        while (resultTables.next()) {
            Map<String, Column> columnMap = new HashMap<String, Column>();
            String tableName = StringEscapeUtils.escapeJava(resultTables.getString(3));
            log.debug("Analyzing table " + tableName);

            int columnCount = resultTables.getMetaData().getColumnCount();
            int toupleSize = 0; // in bytes

            tableNamePattern = tableName;
            ResultSet resultColumns = dbmd.getColumns(catalog, schemaPattern, tableNamePattern, columnNamePattern);

            int count = OptiqueAnalyzer.getCountFor(tableName, sch);

            if (count == 0) {
                log.debug("Empty table");
                continue;
            }

            while (resultColumns.next()) {

                String columnName = StringEscapeUtils.escapeJava(resultColumns.getString(4));
                int columnType = resultColumns.getInt(5);

                // computing column's size in bytes
                int columnSize = computeColumnSize(columnName, columnType, tableName);
                toupleSize += columnSize;

                // execute queries for numberOfDiffValues, minVal, maxVal
                Map<String, Integer> diffValFreqMap = new HashMap<String, Integer>();

                // computing column's min and max values
                MinMax mm = computeMinMax(tableName, columnName);
                String minVal = mm.getMin();
                String maxVal = mm.getMax();

                // /
                List<ValFreq> freqs = computeDistinctValuesFrequency(tableName, columnName);

                for (ValFreq k : freqs) {
                    diffValFreqMap.put(k.getVal(), k.getFreq());

                }

                // /add min max diff vals in the sampling values
                int minOcc = computeValOccurences(tableName, columnName, minVal);
                if (!diffValFreqMap.containsKey(minVal))
                    diffValFreqMap.put(minVal, minOcc);
                int maxOcc = computeValOccurences(tableName, columnName, maxVal);
                if (!diffValFreqMap.containsKey(maxVal))
                    diffValFreqMap.put(maxVal, maxOcc);

                int diffVals = diffValFreqMap.size();

                Column c = new Column(columnName, columnType, columnSize, diffVals, minVal, maxVal, diffValFreqMap);
                columnMap.put(columnName, c);

            }

            ResultSet pkrs = dbmd.getExportedKeys("", "", tableName);
            String pkey = "DEFAULT_KEY";

            while (pkrs.next()) {
                pkey = pkrs.getString("PKCOLUMN_NAME");
                break;
            }

            Table t = new Table(tableName, columnCount, toupleSize, columnMap, count, pkey);
            schema.put(tableName, t);

        }

        return schema;

    }

    /* private-helper methods */
    private int computeColumnSize(String columnName, int columnType, String table_sample) throws Exception {
        int columnSize = 0;
        if (columnType == Types.INTEGER || columnType == Types.REAL || columnType == Types.DOUBLE
                || columnType == Types.DECIMAL || columnType == Types.FLOAT || columnType == Types.NUMERIC) {
            columnSize = NUM_SIZE;
        } else if (columnType == Types.VARCHAR) {
            String query0 = "select max(length(`" + columnName + "`)) as length from (select `" + columnName
                    + "` from `" + table_sample + "`)" + " where `" + columnName + "` is not null limit "
                    + MAX_STRING_SAMPLE;

            Statement stmt0 = con.createStatement();
            ResultSet rs0 = stmt0.executeQuery(query0);

            while (rs0.next()) {
                columnSize = rs0.getInt("length");
            }
            rs0.close();
            stmt0.close();

        } else if (columnType == Types.BLOB)
            columnSize = BLOB_SIZE;

        return columnSize;
    }

    private MinMax computeMinMax(String tableName, String columnName) throws Exception {
        String query1 = "select min(`" + columnName + "`) as minVal, max(`" + columnName + "`) "
                + "as maxVal  from `" + tableName + "` where `" + columnName + "` is not null";

        String minVal = "", maxVal = "";

        Statement stmt1 = con.createStatement();
        ResultSet rs1 = stmt1.executeQuery(query1);
        while (rs1.next()) {
            minVal = rs1.getString("minVal");
            maxVal = rs1.getString("maxVal");
        }
        rs1.close();
        stmt1.close();

        return new MinMax(minVal, maxVal);
    }

    private int computeValOccurences(String tableName, String columnName, String value) throws Exception {
        String queryDf = "select count(*) as valCount " + "from `" + tableName + "` where `" + columnName
                + "` is not null and  `" + columnName + "` = \"" + value + "\"";
        Statement stmt = con.createStatement();
        ResultSet rs = stmt.executeQuery(queryDf);
        int diffValCount = 0;
        while (rs.next()) {
            diffValCount = rs.getInt("valCount");
        }
        rs.close();
        stmt.close();

        return diffValCount;
    }

    private List<ValFreq> computeDistinctValuesFrequency(String table_sample, String columnName) throws Exception {
        List<ValFreq> freqs = new LinkedList<ValFreq>();

        String query = "select `" + columnName + "` as val, count(*) as freq from `" + table_sample + "` where `"
                + columnName + "` is not null group by `" + columnName + "`";

        Statement stmt = con.createStatement();
        ResultSet rs = stmt.executeQuery(query);

        while (rs.next()) {
            freqs.add(new ValFreq(rs.getString("val"), rs.getInt("freq")));
        }

        rs.close();
        stmt.close();

        return freqs;
    }

    /* inner - helper classes */
    private final class MinMax {
        private final String min;
        private final String max;

        public MinMax(String min, String max) {
            this.min = min;
            this.max = max;
        }

        public String getMin() {
            return min;
        }

        public String getMax() {
            return max;
        }

        @Override
        public String toString() {
            return "MinMax{" + "min=" + min + ", max=" + max + '}';
        }

    }

    private final class ValFreq {
        private final String val;
        private final int freq;

        public ValFreq(String val, int freq) {
            this.val = val;
            this.freq = freq;
        }

        public String getVal() {
            return val;
        }

        public int getFreq() {
            return freq;
        }

    }

    public void setSch(String s) {
        this.sch = s;
    }
}