Example usage for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance)

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

@Override
protected Instances process(Instances instances) throws Exception {
    Instances result = new Instances(getOutputFormat(), 0);

    // Convert all instances w/o normalization
    ArrayList<Instance> converted = new ArrayList<Instance>();
    ArrayList<Double> docLengths = new ArrayList<Double>();
    if (!isFirstBatchDone()) {
        m_AvgDocLength = 0;/*from w w w.j  a  va  2  s.c om*/
    }
    for (int i = 0; i < instances.size(); i++) {
        double docLength = convertInstancewoDocNorm(instances.instance(i), converted);

        // Need to compute average document length if necessary
        if (m_filterType != FILTER_NONE) {
            if (!isFirstBatchDone()) {
                m_AvgDocLength += docLength;
            }
            docLengths.add(docLength);
        }
    }
    if (m_filterType != FILTER_NONE) {

        if (!isFirstBatchDone()) {
            m_AvgDocLength /= instances.size();
        }

        // Perform normalization if necessary.
        if (isFirstBatchDone() || (!isFirstBatchDone() && m_filterType == FILTER_NORMALIZE_ALL)) {
            for (int i = 0; i < converted.size(); i++) {
                normalizeInstance(converted.get(i), docLengths.get(i));
            }
        }
    }
    // Push all instances into the output queue
    for (int i = 0; i < converted.size(); i++) {
        result.add(converted.get(i));
    }

    return result;
}

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

public static void main(String[] args) {

    //Create a test dataset
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("message", (ArrayList<String>) null));
    attributes.add(new Attribute("id"));
    {//from w  w  w .  j av  a  2 s  . com
        ArrayList<String> classValues = new ArrayList<String>();
        classValues.add("0");
        classValues.add("1");
        attributes.add(new Attribute("class", classValues));
    }

    Instances instances = new Instances("test", attributes, 0);
    instances.setClassIndex(2);

    String[] messages = new String[] { "No emoticons here", "I have a smiley :)",
            "Two smileys and a frownie :) :) :(", "Several emoticons :( :-( :) :-) ;-) 8-) :-/ :-P" };

    for (int i = 0; i < messages.length; i++) {
        Instance instance = new DenseInstance(instances.numAttributes());
        instance.setValue(instances.attribute(0), messages[i]);
        instance.setValue(instances.attribute(1), i);
        instance.setValue(instances.attribute(2), Integer.toString(i % 2));
        instances.add(instance);
    }

    System.out.println("Before filter:");
    for (int i = 0; i < instances.size(); i++) {
        System.out.println(instances.instance(i).toString());
    }

    try {
        String dictionaryName = "emoticons.txt";
        StringToDictionaryVector filter = new StringToDictionaryVector();
        List<String> termList = StringToDictionaryVector.readDictionaryFile(new File(dictionaryName));
        filter.setTermList(termList);
        filter.setMinTermFreq(1);
        filter.setTFTransform(true);
        filter.setIDFTransform(true);
        filter.setNormalizeDocLength(new SelectedTag(FILTER_NORMALIZE_TEST_ONLY, TAGS_FILTER));
        filter.setOutputWordCounts(true);
        filter.setStringAttribute("message");

        filter.setInputFormat(instances);
        Instances trans1 = Filter.useFilter(instances, filter);
        Instances trans2 = Filter.useFilter(instances, filter);

        System.out.println("\nFirst application:");
        System.out.println(trans1.toString());

        System.out.println("\nSecond application:");
        System.out.println(trans2.toString());

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:eu.cassandra.appliance.IsolatedApplianceExtractor.java

License:Apache License

/**
 * This is an auxiliary function that prepares the clustering data set. The
 * events must be translated to instances of the data set that can be used for
 * clustering./*w ww. ja  v  a2  s  .  com*/
 * 
 * @param isolated
 *          The list of the events containing an isolated appliance.
 * @return The instances of the data
 * @throws Exception
 */
private Instances createInstances(ArrayList<Event> isolated) throws Exception {
    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiffRise");
    Attribute qDiffRise = new Attribute("qDiffRise");
    Attribute pDiffReduce = new Attribute("pDiffReduce");
    Attribute qDiffReduce = new Attribute("qDiffReduce");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);
    attr.add(qDiffRise);
    attr.add(pDiffReduce);
    attr.add(qDiffReduce);

    Instances instances = new Instances("Isolated", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (Event event : isolated) {

        Instance inst = new DenseInstance(5);
        inst.setValue(id, event.getId());
        inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff());
        inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff());
        inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff());
        inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff());

        instances.add(inst);

    }

    int n = Constants.MAX_CLUSTERS_NUMBER;
    Instances newInst = null;

    System.out.println("Instances: " + instances.toSummaryString());
    System.out.println("Max Clusters: " + n);

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) {

        HierarchicalClusterer clusterer = new HierarchicalClusterer();

        String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" };

        clusterer.setDistanceFunction(new EuclideanDistance());
        clusterer.setNumClusters(n);
        clusterer.setOptions(opt);
        clusterer.setPrintNewick(true);
        clusterer.setDebug(true);

        // clusterer.getOptions();

        addcluster.setClusterer(clusterer);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    } else {

        SimpleKMeans kmeans = new SimpleKMeans();

        kmeans.setSeed(10);

        // This is the important parameter to set
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(n);
        kmeans.buildClusterer(instances);

        addcluster.setClusterer(kmeans);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    }

    return newInst;

}

From source file:eu.cassandra.appliance.IsolatedEventsExtractor.java

License:Apache License

/**
 * This is an auxiliary function that prepares the clustering data set. The
 * events must be translated to instances of the data set that can be used for
 * clustering./*from  w w  w  . j  a  va2s . co m*/
 * 
 * @param isolated
 *          The list of the events containing an isolated appliance.
 * @return The instances of the data
 * @throws Exception
 */
private Instances createInstances(ArrayList<Event> isolated) throws Exception {
    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiffRise");
    Attribute qDiffRise = new Attribute("qDiffRise");
    Attribute pDiffReduce = new Attribute("pDiffReduce");
    Attribute qDiffReduce = new Attribute("qDiffReduce");
    Attribute duration = new Attribute("duration");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);
    attr.add(qDiffRise);
    attr.add(pDiffReduce);
    attr.add(qDiffReduce);
    attr.add(duration);

    Instances instances = new Instances("Isolated", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (Event event : isolated) {

        Instance inst = new DenseInstance(6);
        inst.setValue(id, event.getId());
        inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff());
        inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff());
        inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff());
        inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff());
        inst.setValue(duration, event.getEndMinute() - event.getStartMinute());
        instances.add(inst);

    }

    int n = Constants.MAX_CLUSTERS_NUMBER;
    Instances newInst = null;

    log.info("Instances: " + instances.toSummaryString());
    log.info("Max Clusters: " + n);

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) {

        HierarchicalClusterer clusterer = new HierarchicalClusterer();

        String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" };

        clusterer.setDistanceFunction(new EuclideanDistance());
        clusterer.setNumClusters(n);
        clusterer.setOptions(opt);
        clusterer.setPrintNewick(true);
        clusterer.setDebug(true);

        // clusterer.getOptions();

        addcluster.setClusterer(clusterer);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    } else {

        SimpleKMeans kmeans = new SimpleKMeans();

        kmeans.setSeed(10);

        // This is the important parameter to set
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(n);
        kmeans.buildClusterer(instances);

        addcluster.setClusterer(kmeans);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    }

    return newInst;

}

From source file:eu.cassandra.server.mongo.csn.MongoCluster.java

License:Apache License

/**
 * /*from   w  w w . j  ava 2 s .c  o  m*/
 * @param clusterBasedOn
 * @param graph_id
 * @param httpHeaders
 * @return
 */
private Instances getInstances(String clusterBasedOn, String graph_id) {
    FastVector attributes = new FastVector();
    if (clusterBasedOn.equalsIgnoreCase("hoursP") || clusterBasedOn.equalsIgnoreCase("hoursQ")
            || clusterBasedOn.equalsIgnoreCase("hoursE")) {
        for (int i = 0; i < 24; i++) {
            attributes.addElement(new Attribute("att" + i));
        }
    } else {
        attributes.addElement(new Attribute("att0"));
    }
    Instances instances = new Instances("data", attributes, 0);

    DBCursor nodes = DBConn.getConn().getCollection(MongoGraphs.COL_CSN_NODES)
            .find(new BasicDBObject("graph_id", graph_id));
    //Get all nodes
    while (nodes.hasNext()) {
        double[] values = null;
        DBObject installationDBObj = nodes.next();
        nodeIDs.add(installationDBObj.get("_id").toString());
        //If graph was build based on Person or Installation Type do nothing
        if (clusterBasedOn.equalsIgnoreCase(MongoEdges.PersonType)
                || clusterBasedOn.equalsIgnoreCase(MongoEdges.InstallationType)
                || clusterBasedOn.equalsIgnoreCase(MongoEdges.TransformerID)
                || clusterBasedOn.equalsIgnoreCase(MongoEdges.TopologicalDistance)
                || clusterBasedOn.equalsIgnoreCase(MongoEdges.Location)
                || clusterBasedOn.equalsIgnoreCase(MongoEdges.Location)
                || clusterBasedOn.equalsIgnoreCase(MongoEdges.SocialDistance)) {
            continue;
        } else {
            Object vS = installationDBObj.get(CSNTypes.getCsnTypes(clusterBasedOn));
            if (vS != null) {
                if (clusterBasedOn.equalsIgnoreCase("hoursP") || clusterBasedOn.equalsIgnoreCase("hoursQ")
                        || clusterBasedOn.equalsIgnoreCase("hoursE")) {
                    if (vS instanceof BasicDBList) {
                        BasicDBList v = (BasicDBList) vS;
                        values = new double[v.size()];
                        for (int i = 0; i < v.size(); i++) {
                            Object d = v.get(i);
                            if (d instanceof Double) {
                                values[i] = (Double) d;
                            }
                        }
                    }
                } else {
                    Double v = Double.parseDouble(vS.toString());
                    values = new double[1];
                    values[0] = v;
                }
            }
        }
        if (values != null) {
            Instance instance = new Instance(1, values);
            instances.add(instance);
        }
    }
    nodes.close();
    return instances;
}

From source file:eu.cassandra.utils.Utils.java

License:Apache License

/**
 * This function is used in order to create clusters of points of interest
 * based on the active power difference they have.
 * //from  www  . j  a  v  a 2  s. co  m
 * @param pois
 *          The list of points of interest that will be clustered.
 * @return The newly created clusters with the points that are comprising
 *         them.
 * @throws Exception
 */
public static ArrayList<ArrayList<PointOfInterest>> clusterPoints(ArrayList<PointOfInterest> pois, int bias)
        throws Exception {
    // Initialize the auxiliary variables
    ArrayList<ArrayList<PointOfInterest>> result = new ArrayList<ArrayList<PointOfInterest>>();

    // Estimating the number of clusters that will be created
    int numberOfClusters = (int) (Math.ceil((double) pois.size() / (double) Constants.MAX_POINTS_OF_INTEREST))
            + bias;

    log.info("Clusters: " + pois.size() + " / " + Constants.MAX_POINTS_OF_INTEREST + " + " + bias + " = "
            + numberOfClusters);

    // Create a new empty list of points for each cluster
    for (int i = 0; i < numberOfClusters; i++)
        result.add(new ArrayList<PointOfInterest>());

    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiff");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);

    Instances instances = new Instances("Points of Interest", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (int i = 0; i < pois.size(); i++) {

        Instance inst = new DenseInstance(2);
        inst.setValue(id, i);
        inst.setValue(pDiffRise, Math.abs(pois.get(i).getPDiff()));

        instances.add(inst);

    }

    // System.out.println(instances.toString());

    Instances newInst = null;

    log.debug("Instances: " + instances.toSummaryString());

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    SimpleKMeans kmeans = new SimpleKMeans();

    kmeans.setSeed(numberOfClusters);

    // This is the important parameter to set
    kmeans.setPreserveInstancesOrder(true);
    kmeans.setNumClusters(numberOfClusters);
    kmeans.buildClusterer(instances);

    addcluster.setClusterer(kmeans);
    addcluster.setInputFormat(instances);
    addcluster.setIgnoredAttributeIndices("1");

    // Cluster data set
    newInst = Filter.useFilter(instances, addcluster);

    // System.out.println(newInst.toString());

    // Parse through the dataset to see where each point is placed in the
    // clusters.
    for (int i = 0; i < newInst.size(); i++) {

        String cluster = newInst.get(i).stringValue(newInst.attribute(2));

        cluster = cluster.replace("cluster", "");

        log.debug("Point of Interest: " + i + " Cluster: " + cluster);

        result.get(Integer.parseInt(cluster) - 1).add(pois.get(i));
    }

    // Sorting the each cluster points by their minutes.
    for (int i = result.size() - 1; i >= 0; i--) {
        if (result.get(i).size() == 0)
            result.remove(i);
        else
            Collections.sort(result.get(i), Constants.comp);
    }

    // Sorting the all clusters by their active power.

    Collections.sort(result, Constants.comp5);

    return result;
}

From source file:examples.Pair.java

License:Open Source License

public static Pair<Instances, Instances> seprateTestAndTrainingSets(Instances instances, double probability) {
    Instances trainingSet = new Instances(instances, 0, 0);
    Instances testSet = new Instances(instances, 0, 0);
    Random rand = new Random();
    rand.setSeed(1L);/*from  w  w w.j a va  2  s  .c  o  m*/

    for (int i = 0; i < instances.numInstances(); i++) {
        Instance instance = instances.instance(i);
        if (rand.nextDouble() > probability) {
            testSet.add(instance);
        } else {
            trainingSet.add(instance);
        }
    }

    return new Pair<Instances, Instances>(trainingSet, testSet);
}

From source file:examples.TrainerFrame.java

private void jButtonTrainActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonTrainActionPerformed
    //This is a temporary fix to make it appear like its finished

    pBar.setMaximum(7);//from   w w w .  j a v  a2s .c  o m
    pBar.setValue(0);
    pBar.repaint();
    jLabelTrainerStatus.setText("Extracting Target Features");
    //Generate Target Features
    String featuresTarget = null;
    new Thread(new TrainerFrame.thread1()).start();
    try {
        featuresTarget = GlobalData.getFeatures(jTextFieldCallDirectory.getText());
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(1);
    pBar.repaint();
    jLabelTrainerStatus.setText("Extracting Other Features");

    //Generate Non-targe features Features
    String featuresOther = null;
    new Thread(new TrainerFrame.thread1()).start();
    try {
        featuresOther = GlobalData.getFeatures(jTextFieldOtherSoundDirectory.getText());
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(2);
    pBar.repaint();
    jLabelTrainerStatus.setText("Parsing Features");

    //Load Target Arrf File
    BufferedReader readerTarget;
    Instances dataTarget = null;
    try {
        readerTarget = new BufferedReader(new FileReader(featuresTarget));
        dataTarget = new Instances(readerTarget);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(3);
    pBar.repaint();
    //Load Other Arrf File
    BufferedReader readerOther;
    Instances dataOther = null;
    try {
        readerOther = new BufferedReader(new FileReader(featuresOther));
        dataOther = new Instances(readerOther);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(4);
    pBar.repaint();
    jLabelTrainerStatus.setText("Training Classifier");

    Instances newData = new Instances(dataTarget);
    FastVector typeList = new FastVector() {
    };
    typeList.add("target");
    typeList.add("other");
    newData.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList),
            newData.numAttributes());
    for (Instance instance : newData) {
        instance.setValue(newData.numAttributes() - 1, "target");
    }

    dataOther.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList),
            dataOther.numAttributes());
    for (Instance instance : dataOther) {
        instance.setValue(newData.numAttributes() - 1, "other");
        newData.add(instance);
    }

    newData.setClassIndex(newData.numAttributes() - 1);
    pBar.setValue(5);
    pBar.repaint();
    ArffSaver saver = new ArffSaver();
    saver.setInstances(newData);
    try {
        saver.setFile(new File("AnimalCallTrainingFile.arff"));
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    try {
        saver.writeBatch();
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(6);
    pBar.repaint();
    //Train a classifier
    String[] options = new String[1];
    options[0] = "-U";
    J48 tree = new J48();
    try {
        tree.setOptions(options);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    try {
        tree.buildClassifier(newData);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    Debug.saveToFile("Classifiers/" + jTextFieldClassifierName.getText(), tree);
    System.out.println("classifier saved");
    MyClassifier tempClass = new MyClassifier(jTextFieldClassifierName.getText());
    GlobalData.classifierList.addElement(tempClass.name);
    pBar.setValue(7);
    pBar.repaint();
    jLabelTrainerStatus.setText("Finished");

}

From source file:facebookpostpuller.PostModel.java

public static void convertToArff(File file) throws Exception {

    FastVector atts;/*from w  w  w.j ava  2s.co m*/
    FastVector attVals;
    Instances data;
    double[] vals;

    file = new File(file + ".arff");

    atts = new FastVector();
    atts.addElement(new Attribute(("name"), (FastVector) null)); // 5/27/2014
    atts.addElement(new Attribute(("message"), (FastVector) null));

    attVals = new FastVector();
    attVals.addElement("13-17");
    attVals.addElement("18-24");
    attVals.addElement("25-34");
    attVals.addElement("35-44");
    attVals.addElement("45-54");
    atts.addElement(new Attribute("age-group", attVals));

    data = new Instances("predict_age", atts, 0);

    Iterator it = posts.entrySet().iterator();

    while (it.hasNext()) {
        Map.Entry pairs = (Map.Entry) it.next();

        vals = new double[data.numAttributes()];
        User user = (User) pairs.getValue();
        String name = user.getName(); // 5/27/2014
        String message = ((Post) (pairs.getKey())).getMessage();

        Preprocess pre = new Preprocess();
        message = pre.emoticons(message);
        message = pre.emoji(message);
        message = pre.url(message);

        //StringFilter filter = new StringFilter(message);
        vals[0] = data.attribute(0).addStringValue(name); // 5/27/2014
        vals[1] = data.attribute(1).addStringValue(message);

        int age = calculateAge(user.getBirthdayAsDate());
        if (age >= 13 && age <= 17) {
            vals[2] = attVals.indexOf("13-17");
        } else if (age >= 18 && age <= 24) {
            vals[2] = attVals.indexOf("18-24");
        } else if (age >= 25 && age <= 34) {
            vals[2] = attVals.indexOf("25-34");
        } else if (age >= 35 && age <= 44) {
            vals[2] = attVals.indexOf("35-44");
        } else if (age >= 45) { // Modified 6/11/2014 
            vals[2] = attVals.indexOf("45-54");
        }

        data.add(new Instance(1.0, vals));

        it.remove();
    }

    ArffSaver saver = new ArffSaver();
    saver.setInstances(data);
    saver.setFile(file);
    saver.writeBatch();
}

From source file:facebookpostpuller.PostModelBACKUP.java

public static void convertToArff(File file) throws Exception {

    FastVector atts;/*from  w  w w  . j  a v a 2  s.  co  m*/
    FastVector attVals;
    Instances data;
    double[] vals;

    file = new File(file + ".arff");

    atts = new FastVector();
    atts.addElement(new Attribute(("name"), (FastVector) null)); // 5/27/2014
    atts.addElement(new Attribute(("message"), (FastVector) null));

    attVals = new FastVector();
    attVals.addElement("13-17");
    attVals.addElement("18-24");
    attVals.addElement("25-34");
    attVals.addElement("35-44");
    attVals.addElement("45-54");
    atts.addElement(new Attribute("age-group", attVals));

    data = new Instances("predict_age", atts, 0);

    Iterator it = posts.entrySet().iterator();

    while (it.hasNext()) {
        Map.Entry pairs = (Map.Entry) it.next();

        vals = new double[data.numAttributes()];
        User user = (User) pairs.getValue();
        String name = user.getName(); // 5/27/2014
        String message = ((Post) (pairs.getKey())).getMessage();
        //StringFilter filter = new StringFilter(message);
        vals[0] = data.attribute(0).addStringValue(name); // 5/27/2014
        vals[1] = data.attribute(1).addStringValue(message);

        int age = calculateAge(user.getBirthdayAsDate());
        if (age >= 13 && age <= 17) {
            vals[2] = attVals.indexOf("13-17");
        } else if (age >= 18 && age <= 24) {
            vals[2] = attVals.indexOf("18-24");
        } else if (age >= 25 && age <= 34) {
            vals[2] = attVals.indexOf("25-34");
        } else if (age >= 35 && age <= 44) {
            vals[2] = attVals.indexOf("35-44");
        } else if (age >= 45 && age <= 54) {
            vals[2] = attVals.indexOf("45-54");
        }

        data.add(new Instance(1.0, vals));

        it.remove();
    }

    ArffSaver saver = new ArffSaver();
    saver.setInstances(data);
    saver.setFile(file);
    saver.writeBatch();
}