adams.ml.Dataset.java Source code

Java tutorial

Introduction

Here is the source code for adams.ml.Dataset.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * Dataset.java
 * Copyright (C) 2012-2014 University of Waikato, Hamilton, New Zealand
 */
package adams.ml;

import java.io.Serializable;
import java.lang.reflect.Array;
import java.util.Collections;
import java.util.Comparator;
import java.util.Hashtable;
import java.util.Vector;

import org.apache.commons.math.stat.StatUtils;

/**
 * Use {@link adams.ml.data.Dataset} instead.
 *
 * @author  dale (dale at waikato dot ac dot nz)
 * @version $Revision: 9565 $
 */
@Deprecated
public class Dataset implements Serializable, Cloneable {

    public class Mapping {
        public Mapping(BaseData.Type typ) {
            m_type = typ;
        }

        public Mapping() {
        }

        protected BaseData.Type m_type;
        protected BaseData.Type m_basetype; // for arrays
        protected int m_size = -1; //for arrays
    }

    protected Hashtable<String, Mapping> m_Mapping;
    protected Hashtable<Integer, Vector<String>> m_errors = new Hashtable<>(); // row, err column

    /**
     *
     */
    private static final long serialVersionUID = 4458447431240251541L;

    public class DataRowIntegerComparable implements Comparator<DataRow> {

        public DataRowIntegerComparable(String f) {
            m_field = f;
        }

        protected String m_field;

        @Override
        public int compare(DataRow o1, DataRow o2) {
            return ((Integer) o1.get(m_field).getData() > (Integer) o2.get(m_field).getData() ? 1
                    : (o1 == o2 ? 0 : -1));
        }
    }

    /** optional name of this dataset. */
    protected String m_name = "";

    protected Vector<DataRow> m_rows;

    public boolean sortIntegerField(String field) {
        Collections.sort(m_rows, new DataRowIntegerComparable(field));
        return (true);
    }

    public Mapping getType(String columnname) {
        return (m_Mapping.get(columnname));
    }

    public Dataset getWithStringValue(String columnname, String val) {
        Dataset ret = new Dataset();

        for (int row = 0; row < count(); row++) {
            if (m_errors.get(row) != null) {
                continue;
            }
            DataRow dr = get(row);
            BaseData cell = dr.get(columnname);
            if (cell == null) {
                continue;
            }
            if (val.equals(cell.getData().toString())) {
                ret.add(dr.copy());
            }
        }
        return (ret);
    }

    protected String summariseNumericBy(String columnname, String by) {
        StringBuffer ret = new StringBuffer();
        Vector<String> distinct = getDistinctStrings(by);
        for (String s : distinct) {
            Dataset ds = getWithStringValue(by, s);
            ret.append(s + "\n");
            ret.append("----------------\n");
            ret.append(ds.summariseNumeric(columnname));
        }
        return (ret.toString());
    }

    protected String summariseNumeric(String columnname) {
        Vector<Float> vf = new Vector<>();
        Hashtable<Float, Boolean> ind = new Hashtable<>();
        int numErrors = 0;
        int missing = 0;
        int ignored = 0;
        for (int i = 0; i < count(); i++) {
            DataRow dr = get(i);
            if (dr.get(columnname) == null) {
                missing++;
                continue;
            }
            dr = getSafe(i);
            if (dr == null) {
                ignored++;
                continue;
            }
            try {
                Number val = (Number) dr.get(columnname).getData();
                vf.add(val.floatValue());
                if (ind.get(val.floatValue()) == null) {
                    ind.put(val.floatValue(), true);
                }
            } catch (Exception e) {
                try {
                    Float pi = Float.parseFloat(dr.getAsString(columnname));
                    vf.add(pi);
                } catch (Exception e1) {
                    numErrors++;
                    continue;
                }
            }
        }
        double[] f = new double[vf.size()];
        for (int i = 0; i < f.length; i++) {
            f[i] = vf.get(i);
        }
        StringBuffer ret = new StringBuffer();
        ret.append("Total OK: " + vf.size() + "\n");
        ret.append("Ignored Rows: " + ignored + "\n");
        ret.append("Missing: " + missing + "\n");
        ret.append("Errors: " + numErrors + "\n");
        ret.append("Different: " + ind.size() + "\n");
        double min = StatUtils.min(f);
        double max = StatUtils.max(f);
        double mean = StatUtils.mean(f);
        double stdev = Math.sqrt(StatUtils.variance(f));
        double median = StatUtils.percentile(f, 50);
        ret.append("Min,Max: " + min + "," + max + "\n");
        ret.append("Mean: " + mean + "\n");
        ret.append("Standard Deviation: " + stdev + "\n");
        ret.append("Median: " + median + "\n");
        return (ret.toString());
    }

    public Vector<String> getDistinctStrings(String columnname) {
        Hashtable<String, Boolean> ht = new Hashtable<>();
        Vector<String> ret = new Vector<>();
        for (int i = 0; i < count(); i++) {
            DataRow dr = getSafe(i);
            if (dr == null) {
                continue;
            }
            String cll = dr.getAsString(columnname);
            if (cll != null) {
                if (!ht.containsKey(cll)) {
                    ht.put(cll, true);
                }
            }
        }
        for (String s : ht.keySet()) {
            ret.add(s);
        }
        return (ret);
    }

    public DataRow getSafe(int i) {
        if (m_errors.get(i) != null) {
            return (null);
        }
        return (m_rows.get(i));
    }

    /* protected String summariseString(String columnname){
       for (DataRow dr:m_rows){
         addWithoutCheck(dr.copy());
       }
     }
        
     public String summarise(String columnname){
       BaseData.Type typ;
       Mapping m=getType(columnname);
       if (m == null){
         // assume String
         typ=BaseData.Type.STRING;
       } else {
         typ=m.m_type;
       }
        
        
     }
        
    */
    public Dataset(String name) {
        m_name = name;
        m_Mapping = new Hashtable<>();
        m_rows = new Vector<>();
    }

    public Dataset() {
        m_Mapping = new Hashtable<>();
        m_rows = new Vector<>();
    }

    public void setType(String key, BaseData.Type type) {
        Mapping m = new Mapping();
        m.m_type = type;
        m_Mapping.put(key, m);
    }

    public void setType(String key, BaseData.Type basetype, int size) {
        Mapping m = new Mapping();
        m.m_type = BaseData.Type.ARRAY;
        m.m_basetype = basetype;
        m.m_size = size;
        m_Mapping.put(key, m);
    }

    public void setMappingFromBaseData(String key, BaseData bd) {

        if (bd != null) {

            if (bd.isNumeric()) {
                setType(key, BaseData.Type.NUMERIC);
            } else if (bd.isDate()) {
                setType(key, BaseData.Type.DATE);
            } else if (bd.isTime()) {
                setType(key, BaseData.Type.TIME);
            } else if (bd.isArray()) {
                if (Array.getLength(bd.getData()) > 0) {
                    Object ao = Array.get(bd.getData(), 0);
                    if (BaseData.isNumeric(ao)) {
                        setType(key, BaseData.Type.NUMERIC, Array.getLength(bd.getData()));
                    } else {
                        setType(key, BaseData.Type.STRING, Array.getLength(bd.getData()));
                    }
                }
            } else {
                setType(key, BaseData.Type.STRING);
            }
        }
    }

    public BaseData.Type getArrayType(Object bd) {

        if (bd != null) {
            if (BaseData.isNumeric(bd)) {
                return (BaseData.Type.NUMERIC);
            } else if (BaseData.isArray(bd)) {
                System.err.println("can't handle Multidim array");
                return (null);
            } else {
                return (BaseData.Type.STRING);
            }
        } else {
            return (null);
        }
    }

    public void setMappingFromDataRow(DataRow dr) {
        for (String key : dr.getKeys()) {
            BaseData bd = dr.get(key);
            if (bd != null) {
                if (bd.isNumeric()) {
                    setType(key, BaseData.Type.NUMERIC);
                } else if (bd.isArray()) {
                    if (Array.getLength(bd.getData()) > 0) {
                        Object ao = Array.get(bd.getData(), 0);
                        if (BaseData.isNumeric(ao)) {
                            setType(key, BaseData.Type.NUMERIC, Array.getLength(bd.getData()));
                        } else {
                            setType(key, BaseData.Type.STRING, Array.getLength(bd.getData()));
                        }
                    }
                } else {
                    setType(key, BaseData.Type.STRING);
                }
            }
        }
    }

    public Dataset copy() {
        Dataset ret = new Dataset(m_name);
        for (DataRow dr : m_rows) {
            ret.add(dr.copy());
        }
        //ret.m_rows=(Vector<DataRow>)m_rows.clone();
        return (ret);
    }

    public void empty() {
        m_rows = new Vector<>();
    }

    public void useWithoutCheck(Dataset d) {
        m_rows = new Vector<>();
        for (DataRow dr : d.m_rows) {
            addWithoutCheck(dr.copy());
        }
    }

    public void use(Dataset d) {
        m_rows = new Vector<>();
        for (DataRow dr : d.m_rows) {
            add(dr.copy());
        }
    }

    public String getName() {
        return (m_name);
    }

    public void setName(String name) {
        m_name = name;
    }

    public boolean add(DataRow dr) {
        for (String s : dr.getKeys()) {
            Mapping m = m_Mapping.get(s);
            if (m == null) {
                setMappingFromBaseData(s, dr.get(s));
            } else {
                boolean eq = BaseData.typeEquals(dr.getObject(s), m.m_type);
                if (!eq) {
                    System.err.println("Fails insert:" + s + "," + m.m_type.toString() + ","
                            + BaseData.getType(dr.getObject(s)));
                    return (false);
                }
            }
        }
        m_rows.add(dr);
        return (true);
    }

    public void addWithoutCheck(DataRow dr) {
        m_rows.add(dr);
    }

    public int count() {
        return (m_rows.size());
    }

    public DataRow get(int i) {
        return (m_rows.get(i));
    }

    public void autoID(String key) {
        int i = 1;
        for (DataRow dr : m_rows) {
            dr.set(key, new BaseData(i++));
        }
    }

    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < count(); i++) {
            DataRow dr = get(i);
            for (String s : dr.getKeys()) {
                Object o = dr.getObject(s);
                if (o != null && o.getClass().isArray()) {
                    sb.append("(" + s + ":");
                    for (int j = 0; j < Array.getLength(o); j++) {
                        Object p = Array.get(o, j);
                        sb.append(p.toString());
                        if (j < Array.getLength(o) - 1) {
                            sb.append(",");
                        }
                    }
                    sb.append(")");
                } else if (o != null) {
                    sb.append("(" + s + ":" + o.toString() + ")");
                }
            }
            sb.append("\n");
        }
        return (sb.toString());
    }

}