Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(String name, ArrayList<Attribute> attInfo, int capacity) 

Source Link

Document

Creates an empty set of instances.

Usage

From source file:cn.ict.zyq.bestConf.bestConf.sampler.DDSSampler.java

License:Open Source License

/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * //from w w w .  ja  va 2s .c om
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public Instances sampleMultiDimContinuous(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) {

    ArrayList<Integer>[] crntSetPerm;
    //only initialize once
    if (sets == null) {
        //possible number of sample sets will not exceed $sampleSetSize to the power of 2
        int L = (int) Math.min(rounds, atts.size() > 2 ? Math.pow(sampleSetSize, atts.size() - 1)
                : (atts.size() > 1 ? sampleSetSize : 1));

        //initialization
        dists = new long[L];
        sets = new ArrayList[L][];
        for (int i = 0; i < L; i++) {
            dists[i] = -1;
            sets[i] = null;
        }

        long maxMinDist = -1;
        int posWithMaxMinDist = -1;
        //generate L sets of sampleSetSize points
        for (int i = 0; i < L; i++) {
            ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
            while (inAlready(sets, setPerm))//continue the samples set generation till different samples are obtained
                setPerm = generateOneSampleSet(sampleSetSize, atts.size());
            sets[i] = setPerm;

            //compute the minimum distance minDist between any sample pair for each set
            dists[i] = minDistForSet(setPerm);
            //select the set with the maximum minDist
            if (dists[i] > maxMinDist) {
                posWithMaxMinDist = i;
                maxMinDist = dists[i];
            }
        }
        //now let the first sample set be the one with the max mindist
        positionSwitch(sets, dists, 0, posWithMaxMinDist);
    }
    crntSetPerm = sets[sampleSetToGet];

    //generate and output the set with the maximum minDist as the result

    //first, divide the domain of each attribute into sampleSetSize equal subdomain
    double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds
    Iterator<Attribute> itr = atts.iterator();
    Attribute crntAttr;
    boolean[] roundToInt = new boolean[atts.size()];
    for (int i = 0; i < bounds.length; i++) {
        crntAttr = itr.next();
        uniBoundsGeneration(bounds[i], crntAttr, sampleSetSize);
        //flexibleBoundsGeneration(bounds[i], crntAttr, sampleSetSize);

        if (bounds[i][sampleSetSize] - bounds[i][0] > sampleSetSize)
            roundToInt[i] = true;
    }

    //second, generate the set according to setWithMaxMinDist
    Instances data = new Instances("SamplesByLHS", atts, sampleSetSize);
    for (int i = 0; i < sampleSetSize; i++) {
        double[] vals = new double[atts.size()];
        for (int j = 0; j < vals.length; j++) {
            vals[j] = useMid ? (bounds[j][crntSetPerm[j].get(i)] + bounds[j][crntSetPerm[j].get(i) + 1]) / 2
                    : bounds[j][crntSetPerm[j].get(i)]
                            + ((bounds[j][crntSetPerm[j].get(i) + 1] - bounds[j][crntSetPerm[j].get(i)])
                                    * uniRand.nextDouble());
            if (roundToInt[j])
                vals[j] = (int) vals[j];
        }
        data.add(new DenseInstance(1.0, vals));
    }

    //third, return the generated points
    return data;
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.LHSSampler.java

License:Open Source License

/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * // www  .  j av  a2  s.com
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) {

    int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
    double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
    ArrayList<Integer>[] setWithMaxMinDist = null;
    //generate L sets of sampleSetSize points
    for (int i = 0; i < L; i++) {
        ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
        //compute the minimum distance minDist between any sample pair for each set
        crntMinDist = minDistForSet(setPerm);
        //select the set with the maximum minDist
        if (crntMinDist > maxMinDist) {
            setWithMaxMinDist = setPerm;
            maxMinDist = crntMinDist;
        }
    }

    //generate and output the set with the maximum minDist as the result

    //first, divide the domain of each attribute into sampleSetSize equal subdomain
    double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds
    Iterator<Attribute> itr = atts.iterator();
    Attribute crntAttr;
    double pace;
    for (int i = 0; i < bounds.length; i++) {
        crntAttr = itr.next();

        if (crntAttr.isNumeric()) {
            bounds[i][0] = crntAttr.getLowerNumericBound();
            bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
            pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound()) / sampleSetSize;
            for (int j = 1; j < sampleSetSize; j++) {
                bounds[i][j] = bounds[i][j - 1] + pace;
            }
        } else {//crntAttr.isNominal()
            if (crntAttr.numValues() >= sampleSetSize) {
                //randomly select among the set
                for (int j = 0; j <= sampleSetSize; j++)
                    bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
            } else {
                //first round-robin
                int lastPart = sampleSetSize % crntAttr.numValues();
                for (int j = 0; j < sampleSetSize - lastPart; j++)
                    bounds[i][j] = j % crntAttr.numValues();
                //then randomly select
                for (int j = sampleSetSize - lastPart; j <= sampleSetSize; j++)
                    bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
            }
        } //nominal attribute
    } //get all subdomains

    //second, generate the set according to setWithMaxMinDist
    Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
    for (int i = 0; i < sampleSetSize; i++) {
        double[] vals = new double[atts.size()];
        for (int j = 0; j < vals.length; j++) {
            if (atts.get(j).isNumeric()) {
                vals[j] = useMid
                        ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1])
                                / 2
                        : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1]
                                - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble());
            } else {//isNominal()
                vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
            }
        }
        data.add(new DenseInstance(1.0, vals));
    }

    //third, return the generated points
    return data;
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.LHSSampler.java

License:Open Source License

/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * //from  ww w  .  j  av  a  2 s .c  o  m
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public Instances sampleMultiDimContinuous(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) {

    int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
    double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
    ArrayList<Integer>[] setWithMaxMinDist = null;
    //generate L sets of sampleSetSize points
    for (int i = 0; i < L; i++) {
        ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
        //compute the minimum distance minDist between any sample pair for each set
        crntMinDist = minDistForSet(setPerm);
        //select the set with the maximum minDist
        if (crntMinDist > maxMinDist) {
            setWithMaxMinDist = setPerm;
            maxMinDist = crntMinDist;
        }
    }

    //generate and output the set with the maximum minDist as the result

    //first, divide the domain of each attribute into sampleSetSize equal subdomain
    double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds
    Iterator<Attribute> itr = atts.iterator();
    Attribute crntAttr;
    boolean[] roundToInt = new boolean[atts.size()];
    for (int i = 0; i < bounds.length; i++) {
        crntAttr = itr.next();
        uniBoundsGeneration(bounds[i], crntAttr, sampleSetSize);
        //flexibleBoundsGeneration(bounds[i], crntAttr, sampleSetSize);

        if (bounds[i][sampleSetSize] - bounds[i][0] > sampleSetSize)
            roundToInt[i] = true;
    }

    //second, generate the set according to setWithMaxMinDist
    Instances data = new Instances("SamplesByLHS", atts, sampleSetSize);
    for (int i = 0; i < sampleSetSize; i++) {
        double[] vals = new double[atts.size()];
        for (int j = 0; j < vals.length; j++) {
            vals[j] = useMid
                    ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2
                    : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1]
                            - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble());
            if (roundToInt[j])
                vals[j] = (int) vals[j];
        }
        data.add(new DenseInstance(1.0, vals));
    }

    //third, return the generated points
    return data;
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.LHSSampler.java

License:Open Source License

/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * //  w  w w .  j a va2s  . c o m
 * Let PACE be upper-lower DIVided by the sampleSetSize
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
private static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize,
        boolean useMid) {

    int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
    double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
    ArrayList<Integer>[] setWithMaxMinDist = null;
    //generate L sets of sampleSetSize points
    for (int i = 0; i < L; i++) {
        ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
        //compute the minimum distance minDist between any sample pair for each set
        crntMinDist = minDistForSet(setPerm);
        //select the set with the maximum minDist
        if (crntMinDist > maxMinDist) {
            setWithMaxMinDist = setPerm;
            maxMinDist = crntMinDist;
        }
    }

    //generate and output the set with the maximum minDist as the result

    //first, divide the domain of each attribute into sampleSetSize equal subdomain
    double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds
    Iterator<Attribute> itr = atts.iterator();
    Attribute crntAttr;
    double pace;
    for (int i = 0; i < bounds.length; i++) {
        crntAttr = itr.next();

        bounds[i][0] = crntAttr.getLowerNumericBound();
        bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
        pace = (bounds[i][sampleSetSize] - bounds[i][0]) / sampleSetSize;
        for (int j = 1; j < sampleSetSize; j++) {
            bounds[i][j] = bounds[i][j - 1] + pace;
        }
    }

    //second, generate the set according to setWithMaxMinDist
    Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
    for (int i = 0; i < sampleSetSize; i++) {
        double[] vals = new double[atts.size()];
        for (int j = 0; j < vals.length; j++) {
            vals[j] = useMid
                    ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2
                    : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1]
                            - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble());
        }
        data.add(new DenseInstance(1.0, vals));
    }

    //third, return the generated points
    return data;
}

From source file:cn.ict.zyq.bestConf.util.LHSInitializer.java

License:Open Source License

/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * /*from   ww  w  .  j a  v a2  s.  c o  m*/
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) {

    int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
    double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
    ArrayList<Integer>[] setWithMaxMinDist = null;
    //generate L sets of sampleSetSize points
    for (int i = 0; i < L; i++) {
        ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
        //compute the minimum distance minDist between any sample pair for each set
        crntMinDist = minDistForSet(setPerm);
        //select the set with the maximum minDist
        if (crntMinDist > maxMinDist) {
            setWithMaxMinDist = setPerm;
            maxMinDist = crntMinDist;
        }
    }

    //generate and output the set with the maximum minDist as the result

    //first, divide the domain of each attribute into sampleSetSize equal subdomain
    double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds
    Iterator<Attribute> itr = atts.iterator();
    Attribute crntAttr;
    double pace;
    for (int i = 0; i < bounds.length; i++) {
        crntAttr = itr.next();

        if (crntAttr.isNumeric()) {
            bounds[i][0] = crntAttr.getLowerNumericBound();
            bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
            pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound()) / sampleSetSize;
            for (int j = 1; j < sampleSetSize; j++) {
                bounds[i][j] = bounds[i][j - 1] + pace;
            }
        } else {//crntAttr.isNominal()
            if (crntAttr.numValues() >= sampleSetSize) {
                //randomly select among the set
                for (int j = 0; j <= sampleSetSize; j++)
                    bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
            } else {
                //first round-robin
                int lastPart = sampleSetSize % crntAttr.numValues();
                for (int j = 0; j < sampleSetSize - lastPart; j++)
                    bounds[i][j] = j % crntAttr.numValues();
                //then randomly select
                for (int j = sampleSetSize - lastPart; j <= sampleSetSize; j++)
                    bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
            }
        } //nominal attribute
    } //get all subdomains

    //second, generate the set according to setWithMaxMinDist
    Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
    for (int i = 0; i < sampleSetSize; i++) {
        double[] vals = new double[atts.size()];
        for (int j = 0; j < vals.length; j++) {
            if (atts.get(j).isNumeric()) {
                vals[j] = useMid
                        ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1])
                                / 2
                        : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1]
                                - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble());
            } else {//isNominal()
                vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
            }
        }
        data.add(new DenseInstance(1.0, vals));
    }

    //third, return the generated points
    return data;
}

From source file:cn.ict.zyq.bestConf.util.LHSInitializer.java

License:Open Source License

/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * /*  w w  w.j  ava 2 s  .  com*/
 * Let PACE be log10(upper/lower)
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDimContinuousLog(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) {

    int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
    double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
    ArrayList<Integer>[] setWithMaxMinDist = null;
    //generate L sets of sampleSetSize points
    for (int i = 0; i < L; i++) {
        ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
        //compute the minimum distance minDist between any sample pair for each set
        crntMinDist = minDistForSet(setPerm);
        //select the set with the maximum minDist
        if (crntMinDist > maxMinDist) {
            setWithMaxMinDist = setPerm;
            maxMinDist = crntMinDist;
        }
    }

    //generate and output the set with the maximum minDist as the result

    //first, divide the domain of each attribute into sampleSetSize equal subdomain
    double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds
    Iterator<Attribute> itr = atts.iterator();
    Attribute crntAttr;
    int step, crntStep;
    for (int i = 0; i < bounds.length; i++) {
        crntAttr = itr.next();

        bounds[i][0] = crntAttr.getLowerNumericBound();
        bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
        crntStep = (int) Math.log10(bounds[i][sampleSetSize] - bounds[i][0]);
        step = sampleSetSize / crntStep;//num of points drawn after the multiplication of 10
        int left = sampleSetSize % crntStep;
        if (bounds[i][0] == 0)
            bounds[i][0] = uniRand.nextInt(10);
        crntStep = 1;
        double theBound = bounds[i][sampleSetSize] / 10;
        for (int j = 1; j < sampleSetSize; j++) {
            if (crntStep >= step && bounds[i][j - 1] <= theBound)
                crntStep = 0;

            if (crntStep == 0)
                bounds[i][j] = bounds[i][j - step] * 10;
            else if (crntStep < step)
                bounds[i][j] = bounds[i][j - crntStep] * ((double) crntStep * 10. / ((double) step + 1.));
            else if (crntStep >= step)
                bounds[i][j] = bounds[i][j - crntStep] * ((double) crntStep * 10. / (double) (left + step + 1));

            if (bounds[i][j] >= bounds[i][sampleSetSize])
                System.err.println("be careful!!!!");
            crntStep++;
        }
    }

    //second, generate the set according to setWithMaxMinDist
    Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
    for (int i = 0; i < sampleSetSize; i++) {
        double[] vals = new double[atts.size()];
        for (int j = 0; j < vals.length; j++) {
            vals[j] = useMid
                    ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2
                    : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1]
                            - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble());
        }
        data.add(new DenseInstance(1.0, vals));
    }

    //third, return the generated points
    return data;
}

From source file:cn.ict.zyq.bestConf.util.LHSInitializer.java

License:Open Source License

/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * /*  www . jav  a  2 s . co m*/
 * Let PACE be log10(upper/lower)
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDimContinuous(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) {

    int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
    double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
    ArrayList<Integer>[] setWithMaxMinDist = null;
    //generate L sets of sampleSetSize points
    for (int i = 0; i < L; i++) {
        ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
        //compute the minimum distance minDist between any sample pair for each set
        crntMinDist = minDistForSet(setPerm);
        //select the set with the maximum minDist
        if (crntMinDist > maxMinDist) {
            setWithMaxMinDist = setPerm;
            maxMinDist = crntMinDist;
        }
    }

    //generate and output the set with the maximum minDist as the result

    //first, divide the domain of each attribute into sampleSetSize equal subdomain
    double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds
    Iterator<Attribute> itr = atts.iterator();
    Attribute crntAttr;
    boolean[] roundToInt = new boolean[atts.size()];
    for (int i = 0; i < bounds.length; i++) {
        crntAttr = itr.next();
        uniBoundsGeneration(bounds[i], crntAttr, sampleSetSize);
        //flexibleBoundsGeneration(bounds[i], crntAttr, sampleSetSize);

        if (bounds[i][sampleSetSize] - bounds[i][0] > sampleSetSize)
            roundToInt[i] = true;
    }

    //second, generate the set according to setWithMaxMinDist
    Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
    for (int i = 0; i < sampleSetSize; i++) {
        double[] vals = new double[atts.size()];
        for (int j = 0; j < vals.length; j++) {
            vals[j] = useMid
                    ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2
                    : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1]
                            - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble());
            if (roundToInt[j])
                vals[j] = (int) vals[j];
        }
        data.add(new DenseInstance(1.0, vals));
    }

    //third, return the generated points
    return data;
}

From source file:cn.ict.zyq.bestConf.util.LHSInitializer.java

License:Open Source License

/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * /*from   www  .  j a  va  2 s .c  om*/
 * Let PACE be upper-lower DIVided by the sampleSetSize
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) {

    int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
    double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
    ArrayList<Integer>[] setWithMaxMinDist = null;
    //generate L sets of sampleSetSize points
    for (int i = 0; i < L; i++) {
        ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
        //compute the minimum distance minDist between any sample pair for each set
        crntMinDist = minDistForSet(setPerm);
        //select the set with the maximum minDist
        if (crntMinDist > maxMinDist) {
            setWithMaxMinDist = setPerm;
            maxMinDist = crntMinDist;
        }
    }

    //generate and output the set with the maximum minDist as the result

    //first, divide the domain of each attribute into sampleSetSize equal subdomain
    double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds
    Iterator<Attribute> itr = atts.iterator();
    Attribute crntAttr;
    double pace;
    for (int i = 0; i < bounds.length; i++) {
        crntAttr = itr.next();

        bounds[i][0] = crntAttr.getLowerNumericBound();
        bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
        pace = (bounds[i][sampleSetSize] - bounds[i][0]) / sampleSetSize;
        for (int j = 1; j < sampleSetSize; j++) {
            bounds[i][j] = bounds[i][j - 1] + pace;
        }
    }

    //second, generate the set according to setWithMaxMinDist
    Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
    for (int i = 0; i < sampleSetSize; i++) {
        double[] vals = new double[atts.size()];
        for (int j = 0; j < vals.length; j++) {
            vals[j] = useMid
                    ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2
                    : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1]
                            - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble());
        }
        data.add(new DenseInstance(1.0, vals));
    }

    //third, return the generated points
    return data;
}

From source file:com.actelion.research.orbit.imageAnalysis.imaging.TMAPoints.java

License:Open Source License

/**
 * returns x/y pairs for each input point
 *
 * @param pList/* w  w w. j a v a2  s. c  om*/
 * @return
 */
private HashMap<Point, Point> clusterLines(List<Point> pList) {
    ArrayList<Attribute> attrListX = new ArrayList<Attribute>(2);
    attrListX.add(new Attribute("xvalue"));
    ArrayList<Attribute> attrListY = new ArrayList<Attribute>(2);
    attrListY.add(new Attribute("yvalue"));
    //attrList.add(new Attribute("class"));
    Instances xInst = new Instances("xlines", attrListX, pList.size());
    Instances yInst = new Instances("ylines", attrListY, pList.size());
    //instances.setClassIndex(1);
    for (Point p : pList) {
        //Instance inst = new DenseInstance(1d, new double[]{p.x,Double.NaN});
        Instance instX = new DenseInstance(1d, new double[] { p.x });
        instX.setDataset(xInst);
        //inst.setClassMissing();
        xInst.add(instX);

        Instance instY = new DenseInstance(1d, new double[] { p.y });
        instY.setDataset(yInst);
        yInst.add(instY);
    }
    try {
        EM colClusterer = new EM();
        int numCols = guessNumClusters(colClusterer, xInst, 1, 20);
        colClusterer.setNumClusters(numCols);
        colClusterer.buildClusterer(xInst);
        logger.debug("NumCols: " + colClusterer.getNumClusters());

        EM rowClusterer = new EM();
        int numRows = guessNumClusters(rowClusterer, yInst, 1, 20);
        rowClusterer.setNumClusters(numRows);
        rowClusterer.buildClusterer(yInst);
        logger.debug("NumRows: " + rowClusterer.getNumClusters());

        logger.trace("ColClusterer:");
        HashMap<Integer, Integer> colHash = sortAndpPrintCluster(colClusterer);

        logger.trace("RowClusterer:");
        HashMap<Integer, Integer> rowHash = sortAndpPrintCluster(rowClusterer);

        if (logger.isTraceEnabled()) {
            logger.trace("ColHash:");
            for (Integer i : colHash.keySet()) {
                logger.trace("cluster " + i + ": " + colHash.get(i));
            }
            logger.trace("RowHash:");
            for (Integer i : rowHash.keySet()) {
                logger.trace("cluster " + i + ": " + rowHash.get(i));
            }
        }

        // classify points
        HashMap<Point, Point> pMap = new HashMap<Point, Point>();
        for (Point p : pList) {
            Instance instX = new DenseInstance(1d, new double[] { p.x });
            instX.setDataset(xInst);
            Instance instY = new DenseInstance(1d, new double[] { p.y });
            instY.setDataset(yInst);
            int x = colClusterer.clusterInstance(instX);
            int y = rowClusterer.clusterInstance(instY);
            x = colHash.get(x);
            y = rowHash.get(y);
            logger.trace(p + ": " + x + "/" + y);
            pMap.put(p, new Point(x, y));
        }
        return pMap;

    } catch (Exception e) {
        e.printStackTrace();
        logger.error("error while clustering points", e);
        return null;
    }

}

From source file:com.actelion.research.orbit.imageAnalysis.models.OrbitModel.java

License:Open Source License

/**
 * convert models from old weka version/*from ww w  . j ava  2 s.  c  om*/
 *
 * @param model
 */
public static void fixOldModelVersion(final OrbitModel model) {
    if (model == null)
        return; // nothing to fix
    boolean oldWekaVersion = false;
    try {
        model.getStructure().classAttribute().numValues();
    } catch (NullPointerException ne) {
        oldWekaVersion = true;
    }

    // apply old model fix?
    if (oldWekaVersion) {
        logger.info("model from old weka version (< 3.7.11) detected, trying to apply fixes");
        int numClasses = model.getClassShapes().size();
        TissueFeatures tf = new TissueFeatures(model.getFeatureDescription(), null);
        int numFeatures = tf.getFeaturesPerSample() * model.getFeatureDescription().getSampleSize() + 1;
        ArrayList<Attribute> attrInfo = new ArrayList<Attribute>(numFeatures);
        for (int a = 0; a < numFeatures - 1; a++) {
            Attribute attr = new Attribute("a" + a);
            attrInfo.add(attr);
        }
        List<String> classValues = new ArrayList<String>(numClasses);
        for (int i = 0; i < numClasses; i++) {
            classValues.add((i + 1) + ".0"); // "1.0", "2.0", ...
        }
        Attribute classAttr = new Attribute("class", classValues);
        attrInfo.add(classAttr);

        Instances structure = new Instances("trainSet pattern classes", attrInfo, 0);
        structure.setClassIndex(numFeatures - 1);
        model.setStructure(structure);

        try {
            if (model.getClassifier() != null && model.getClassifier().getClassifier() != null
                    && model.getClassifier().getClassifier() instanceof SMO) {
                SMO smo = ((SMO) model.getClassifier().getClassifier());

                Field field = smo.getClass().getDeclaredField("m_classAttribute");
                field.setAccessible(true);
                field.set(smo, classAttr);

                // missing values
                ReplaceMissingValues rmv = new ReplaceMissingValues();
                rmv.setInputFormat(structure);

                Field missing = smo.getClass().getDeclaredField("m_Missing");
                missing.setAccessible(true);
                missing.set(smo, rmv);

                // filter
                Field filter = smo.getClass().getDeclaredField("m_Filter");
                filter.setAccessible(true);
                Filter normalize = (Filter) filter.get(smo);

                RelationalLocator relLoc = new RelationalLocator(structure);
                StringLocator strLoc = new StringLocator(structure);

                Field outputRelAtts = normalize.getClass().getSuperclass().getSuperclass()
                        .getDeclaredField("m_OutputRelAtts");
                outputRelAtts.setAccessible(true);
                outputRelAtts.set(normalize, relLoc);

                Field inputRelAtts = normalize.getClass().getSuperclass().getSuperclass()
                        .getDeclaredField("m_InputRelAtts");
                inputRelAtts.setAccessible(true);
                inputRelAtts.set(normalize, relLoc);

                Field outputStrAtts = normalize.getClass().getSuperclass().getSuperclass()
                        .getDeclaredField("m_OutputStringAtts");
                outputStrAtts.setAccessible(true);
                outputStrAtts.set(normalize, strLoc);

                Field inputStrAtts = normalize.getClass().getSuperclass().getSuperclass()
                        .getDeclaredField("m_InputStringAtts");
                inputStrAtts.setAccessible(true);
                inputStrAtts.set(normalize, strLoc);

                Field outputFormat = normalize.getClass().getSuperclass().getSuperclass()
                        .getDeclaredField("m_OutputFormat");
                outputFormat.setAccessible(true);
                outputFormat.set(normalize, structure);

                logger.info("fixes applied, the model should work with a weka version >= 3.7.11 now");
            } // else: good luck...
        } catch (Exception e) {
            e.printStackTrace();
            logger.error("new weka version fixes could not be applied: " + e.getMessage());
        }
    } // old weka version
    fixOldModelVersion(model.getSegmentationModel()); // fixOldModelVersion can handle null
    fixOldModelVersion(model.getSecondarySegmentationModel()); // fixOldModelVersion can handle null
    fixOldModelVersion(model.getExclusionModel()); // fixOldModelVersion can handle null
}