Example usage for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name)

Source Link

Document

Returns an attribute given its name.

Usage

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Invokes filter to transform last parameter into a Nominal
 * /* ww  w .  j  a  v a 2 s . co m*/
 * @param data
 * @return
 * @throws Exception
 */
public static Instances makeLastNominal(Instances data) throws Exception {
    Instances newData;

    if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) {
        NumericToNominal ntn = new NumericToNominal();
        String[] options = new String[2];
        options[0] = "-R"; // "range"
        options[1] = "last"; // first attribute
        ntn.setOptions(options); // set options
        ntn.setInputFormat(data); // inform filter about dataset
        // **AFTER** setting options
        newData = Filter.useFilter(data, ntn); // apply fil

    } else {
        StringToNominal ntn = new StringToNominal();
        String[] options = new String[2];
        options[0] = "-R"; // "range"
        options[1] = "last"; // first attribute
        ntn.setOptions(options); // set options
        ntn.setInputFormat(data); // inform filter about dataset
        // **AFTER** setting options
        newData = Filter.useFilter(data, ntn); // apply fil

    }

    return newData;
}

From source file:com.sliit.normalize.NormalizeDataset.java

public String normalizeDataset() {
    System.out.println("start normalizing data");

    String filePathOut = "";
    try {/*  w w w .j  a v  a 2 s .c  o m*/

        CSVLoader loader = new CSVLoader();
        if (reducedDiemensionFile != null) {

            loader.setSource(reducedDiemensionFile);
        } else {
            if (tempFIle != null && tempFIle.exists()) {

                loader.setSource(tempFIle);
            } else {

                loader.setSource(csvFile);
            }
        }
        Instances dataInstance = loader.getDataSet();
        Normalize normalize = new Normalize();
        dataInstance.setClassIndex(dataInstance.numAttributes() - 1);
        normalize.setInputFormat(dataInstance);
        String directory = csvFile.getParent();
        outputFile = new File(directory + "/" + "normalized" + csvFile.getName());
        if (!outputFile.exists()) {

            outputFile.createNewFile();
        }
        CSVSaver saver = new CSVSaver();
        saver.setFile(outputFile);
        for (int i = 1; i < dataInstance.numInstances(); i++) {

            normalize.input(dataInstance.instance(i));
        }
        normalize.batchFinished();
        Instances outPut = new Instances(dataInstance, 0);
        for (int i = 1; i < dataInstance.numInstances(); i++) {

            outPut.add(normalize.output());
        }
        Attribute attribute = dataInstance.attribute(outPut.numAttributes() - 1);
        for (int j = 0; j < attribute.numValues(); j++) {

            if (attribute.value(j).equals("normal.")) {
                outPut.renameAttributeValue(attribute, attribute.value(j), "0");
            } else {
                outPut.renameAttributeValue(attribute, attribute.value(j), "1");
            }
        }
        saver.setInstances(outPut);
        saver.writeBatch();
        writeToNewFile(directory);
        filePathOut = directory + "norm" + csvFile.getName();
        if (tempFIle != null) {

            tempFIle.delete();
        }
        if (reducedDiemensionFile != null) {

            reducedDiemensionFile.delete();
        }
        outputFile.delete();
    } catch (IOException e) {

        log.error("Error occurred:" + e.getMessage());
    } catch (Exception e) {

        log.error("Error occurred:" + e.getMessage());
    }
    return filePathOut;
}

From source file:com.sliit.normalize.NormalizeDataset.java

public boolean updateStringValues(Map<Integer, String> values) {
    System.out.println("updating String Values");

    boolean status = false;
    try {/*www.  j a v  a  2s . co m*/

        csv.setSource(csvFile);
        Instances dataInstance = csv.getDataSet();
        for (int i = 0; i < dataInstance.numInstances(); i++) {

            if (values.containsKey(i)) {

                Attribute attribute = dataInstance.attribute(i);
                for (int j = 0; j < attribute.numValues(); j++) {
                    dataInstance.renameAttributeValue(attribute, attribute.value(j), j + "");
                }
            }
        }
        tempFIle = new File(csvFile.getParent() + "/temp.csv");
        CSVSaver saver = new CSVSaver();
        saver.setInstances(dataInstance);
        saver.setFile(tempFIle);
        saver.writeBatch();
    } catch (IOException e) {

        log.error("Error occurred:" + e.getMessage());
    }
    return status;
}

From source file:com.spread.experiment.tempuntilofficialrelease.ClassificationViaClustering108.java

License:Open Source License

/**
 * Returns class probability distribution for the given instance.
 * //from w w  w .ja  v a 2 s  .com
 * @param instance the instance to be classified
 * @return the class probabilities
 * @throws Exception if an error occurred during the prediction
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {

    if (m_ZeroR != null) {
        return m_ZeroR.distributionForInstance(instance);
    } else {
        double[] result = new double[instance.numClasses()];

        if (m_ActualClusterer != null) {
            // build new instance
            Instances tempData = m_ClusteringHeader.stringFreeStructure();
            double[] values = new double[tempData.numAttributes()];
            int n = 0;
            for (int i = 0; i < instance.numAttributes(); i++) {
                if (i == instance.classIndex()) {
                    continue;
                }
                if (instance.attribute(i).isString()) {
                    values[n] = tempData.attribute(n).addStringValue(instance.stringValue(i));
                } else if (instance.attribute(i).isRelationValued()) {
                    values[n] = tempData.attribute(n).addRelation(instance.relationalValue(i));
                } else {
                    values[n] = instance.value(i);
                }
                n++;
            }
            Instance newInst = new DenseInstance(instance.weight(), values);
            newInst.setDataset(tempData);

            if (!getLabelAllClusters()) {

                // determine cluster/class
                double r = m_ClustersToClasses[m_ActualClusterer.clusterInstance(newInst)];
                if (r == -1) {
                    return result; // Unclassified
                } else {
                    result[(int) r] = 1.0;
                    return result;
                }
            } else {
                double[] classProbs = new double[instance.numClasses()];
                double[] dist = m_ActualClusterer.distributionForInstance(newInst);
                for (int i = 0; i < dist.length; i++) {
                    for (int j = 0; j < instance.numClasses(); j++) {
                        classProbs[j] += dist[i] * m_ClusterClassProbs[i][j];
                    }
                }
                Utils.normalize(classProbs);
                return classProbs;
            }
        } else {
            return result; // Unclassified
        }
    }
}

From source file:com.tum.classifiertest.DataCache.java

License:Open Source License

/**
 * Creates a DataCache by copying data from a weka.core.Instances object.
 *///from  ww  w . jav  a  2s.  co m
public DataCache(Instances origData) throws Exception {

    classIndex = origData.classIndex();
    numAttributes = origData.numAttributes();
    numClasses = origData.numClasses();
    numInstances = origData.numInstances();

    attNumVals = new int[origData.numAttributes()];
    for (int i = 0; i < attNumVals.length; i++) {
        if (origData.attribute(i).isNumeric()) {
            attNumVals[i] = 0;
        } else if (origData.attribute(i).isNominal()) {
            attNumVals[i] = origData.attribute(i).numValues();
        } else
            throw new Exception("Only numeric and nominal attributes are supported.");
    }

    /* Array is indexed by attribute first, to speed access in RF splitting. */
    vals = new float[numAttributes][numInstances];
    for (int a = 0; a < numAttributes; a++) {
        for (int i = 0; i < numInstances; i++) {
            if (origData.instance(i).isMissing(a))
                vals[a][i] = Float.MAX_VALUE; // to make sure missing values go to the end
            else
                vals[a][i] = (float) origData.instance(i).value(a); // deep copy
        }
    }

    instWeights = new double[numInstances];
    instClassValues = new int[numInstances];
    for (int i = 0; i < numInstances; i++) {
        instWeights[i] = origData.instance(i).weight();
        instClassValues[i] = (int) origData.instance(i).classValue();
    }

    /* compute the sortedInstances for the whole dataset */

    sortedIndices = new int[numAttributes][];

    for (int a = 0; a < numAttributes; a++) { // ================= attr by attr

        if (a == classIndex)
            continue;

        if (attNumVals[a] > 0) { // ------------------------------------- nominal

            // Handling nominal attributes: as of FastRF 0.99, they're sorted as well
            // missing values are coded as Float.MAX_VALUE and go to the end

            sortedIndices[a] = new int[numInstances];
            //int count = 0;

            sortedIndices[a] = FastRfUtils.sort(vals[a]);

            /*for (int i = 0; i < numInstances; i++) {
              if ( !this.isValueMissing(a, i) ) {
                sortedIndices[a][count] = i;
                count++;
              }
            }
                    
            for (int i = 0; i < numInstances; i++) {
              if ( this.isValueMissing(a, i) ) {
                sortedIndices[a][count] = i;
                count++;
              }
            }*/

        } else { // ----------------------------------------------------- numeric

            // Sorted indices are computed for numeric attributes
            // missing values are coded as Float.MAX_VALUE and go to the end
            sortedIndices[a] = FastRfUtils.sort(vals[a]);

        } // ---------------------------------------------------------- attr kind

    } // ========================================================= attr by attr

    // System.out.println(" Done.");

}

From source file:com.yahoo.labs.samoa.instances.WekaToSamoaInstanceConverter.java

License:Apache License

/**
* Samoa instances information.//from www.j a  v  a 2  s .c  o m
*
* @param instances the instances
* @return the instances
*/
public Instances samoaInstancesInformation(weka.core.Instances instances) {
    Instances samoaInstances;
    List<Attribute> attInfo = new ArrayList<Attribute>();
    for (int i = 0; i < instances.numAttributes(); i++) {
        attInfo.add(samoaAttribute(i, instances.attribute(i)));
    }
    samoaInstances = new Instances(instances.relationName(), attInfo, 0);
    samoaInstances.setClassIndex(instances.classIndex());
    return samoaInstances;
}

From source file:com.yimei.core.Discretizer.java

public static Instances discretize(Instances data) {
    Instances discData = new Instances(data);
    for (int i = 0; i < data.numAttributes(); i++) {
        if (data.attribute(i).isNumeric()) {
            double max = Double.MIN_VALUE;
            double min = Double.MAX_VALUE;

            for (int j = 0; j < data.size(); j++) {
                double value = data.instance(j).value(i);
                if (value > max) {
                    max = value;// www  . j  a v  a 2s.c  om
                }
                if (value < min) {
                    min = value;
                }
            }

            double interval = (max - min) / numOfIntervals;

            for (int j = 0; j < data.size(); j++) {
                long discValue = Math.round((data.instance(j).value(i) - min) / interval);
                discData.instance(j).setValue(i, discValue);
            }
        }
    }

    return discData;
}

From source file:com.zazhu.BlueHub.BlueHub.java

License:Apache License

/**
 * receives the last reads from the sensors and creates the features we use
 * only the acc x,y,z (either from internal or external sensor)
 * /*from w  ww  .  ja  v a2s.co  m*/
 * @param sensorQueue
 * @throws Exception 
 */
private Instance processingSenseData(Queue<String> sensorQueue, char whatSensor) throws Exception {

    BufferedReader reader;
    Instances format;
    Instance newInstance = null;

    Log.d(TAG, "Queue size = " + mQueueSize);

    if (sensorQueue.size() <= 0)
        throw new Exception("Queue empty");

    // create the arrays that will contain the accelerometer data
    // s.x s.y s.z
    double[] sx = new double[sensorQueue.size()];
    double[] sy = new double[sensorQueue.size()];
    double[] sz = new double[sensorQueue.size()];

    String rawReading;
    StringTokenizer st;

    int index;

    if (D)
        Log.e(TAG, "+++ COMPUTING FEATURES +++");

    // 1. collect raw data. what kind of sensing data? external vs. internal
    switch (whatSensor) {
    case EXTERNAL:
        index = 0;
        while ((rawReading = sensorQueue.poll()) != null) {
            // FORMAT:
            // "Time_SensorName_SensorNumber_Counter_Xacc_Yacc_Zacc_Xgyro_Ygyro_checksum"
            // position of the values needed: s.x = 4, s.y = 5, s.z = 6
            st = new StringTokenizer(rawReading, FIELD_SEP);
            // not needed data
            for (int i = 0; i < 4; i++)
                st.nextToken();
            // s.x, s.y, s.z
            sx[index] = Double.valueOf(st.nextToken());
            sy[index] = Double.valueOf(st.nextToken());
            sz[index] = Double.valueOf(st.nextToken());

            index += 1;
        }

        // 2. process raw data
        // 2.1 read the input format for the instance (TODO must be changed to
        // use weka classes)
        reader = new BufferedReader(new InputStreamReader(getResources().openRawResource(R.raw.format_extern)));

        try {
            format = new Instances(reader);

            if (format.classIndex() == -1)
                format.setClassIndex(format.numAttributes() - 1);

            // 2.2 create a new instance
            newInstance = new DenseInstance(7);
            newInstance.setDataset(format);
            // set attributes
            newInstance.setValue(format.attribute(0), Feature.getStd(sx));
            newInstance.setValue(format.attribute(1), Feature.getStd(sy));
            newInstance.setValue(format.attribute(2), Feature.getStd(sz));
            newInstance.setValue(format.attribute(3), Feature.getMean(sx));
            newInstance.setValue(format.attribute(4), Feature.getMean(sy));
            newInstance.setValue(format.attribute(5), Feature.getMean(sz));
            // set unknown class
            newInstance.setMissing(format.attribute(6));

        } catch (IOException e) {
            e.printStackTrace();
        }

        break;
    case INTERNAL:

        index = 0;
        while ((rawReading = sensorQueue.poll()) != null) {

            // FORMAT "Xacc_Yacc_Zacc"
            // position of the values needed: s.x = 0, s.y = 1, s.z = 2
            st = new StringTokenizer(rawReading, FIELD_SEP);

            // s.x, s.y, s.z
            sx[index] = Double.valueOf(st.nextToken());
            sy[index] = Double.valueOf(st.nextToken());
            sz[index] = Double.valueOf(st.nextToken());

            index += 1;
        }

        // 2. process raw data
        // 2.1 read the input format for the instance (TODO must be changed to
        // use weka classes)
        reader = new BufferedReader(new InputStreamReader(getResources().openRawResource(R.raw.format_intern)));

        try {
            format = new Instances(reader);

            if (format.classIndex() == -1)
                format.setClassIndex(format.numAttributes() - 1);

            // 2.2 create a new instance
            newInstance = new DenseInstance(7);
            newInstance.setDataset(format);
            // set attributes
            newInstance.setValue(format.attribute(0), Feature.getStd(sx));
            newInstance.setValue(format.attribute(1), Feature.getStd(sy));
            newInstance.setValue(format.attribute(2), Feature.getStd(sz));
            newInstance.setValue(format.attribute(3), Feature.getMean(sx));
            newInstance.setValue(format.attribute(4), Feature.getMean(sy));
            newInstance.setValue(format.attribute(5), Feature.getMean(sz));
            // set unknown class
            newInstance.setMissing(format.attribute(6));

        } catch (IOException e) {
            e.printStackTrace();
        }

        break;
    default:
        if (D)
            Log.e(TAG, "+++ COMPUTING FEATURES: NO VALUE FOR THE SENSOR READING +++");
        break;
    }

    return newInstance;

}

From source file:com.zooclassifier.Model.FileLoader.java

public FileLoader(String filename) throws FileNotFoundException, IOException {
    BufferedReader reader = new BufferedReader(new FileReader(filename));
    ArffLoader.ArffReader arff = new ArffLoader.ArffReader(reader);
    Instances data = arff.getData();
    data.setClassIndex(data.numAttributes() - 1);

    attributes = new String[data.numInstances()][data.numAttributes() - 1];
    labels = new String[data.numInstances()];

    for (int i = 0; i < data.numInstances(); i++) {
        Instance instance = data.instance(i);
        for (int j = 0; j < instance.numAttributes() - 1; j++) {
            attributes[i][j] = instance.stringValue(j);
        }/*from   w w  w . j  av a 2s.c o m*/
        labels[i] = instance.stringValue(instance.numAttributes() - 1);
    }

    attributesLegalValues = new String[data.numAttributes() - 1][];
    for (int i = 0; i < data.numAttributes() - 1; i++) {
        attributesLegalValues[i] = (String[]) Collections.list(data.attribute(i).enumerateValues())
                .toArray(new String[data.attribute(i).numValues()]);
    }

    labelsLegalValues = (String[]) Collections.list(data.attribute(data.numAttributes() - 1).enumerateValues())
            .toArray(new String[data.attribute(data.numAttributes() - 1).numValues()]);
}

From source file:Controller.CtlDataMining.java

public String definirEncabezado(Instances data) {
    /*Se define el encabezado del mensaje, teniendo en cuanta el atributo clase*/
    String descripcion = "<b>El atributo clase seleccionado es "
            + data.attribute(data.numAttributes() - 1).name() + "</b>";
    descripcion += " <b>con posibles valores:</b> ";
    /*Se recorren los posibles valores del atributo clase*/
    for (int z = 0; z < data.attribute(data.numAttributes() - 1).numValues(); z++) {
        descripcion += "<b>" + data.attribute(data.numAttributes() - 1).value(z) + "</b> ";
    }//from  w w w  . ja  va  2 s  . com

    return descripcion;
}