Example usage for weka.core Instances setClass

List of usage examples for weka.core Instances setClass

Introduction

In this page you can find the example usage for weka.core Instances setClass.

Prototype

public void setClass(Attribute att) 

Source Link

Document

Sets the class attribute.

Usage

From source file:de.ugoe.cs.cpdp.loader.DecentDataLoader.java

License:Apache License

/**
 * Loads the given decent file and tranform it from decent->arffx->arff
 * //from   w  w w  . j a  va  2s .  c  om
 * @return Instances in WEKA format
 */
@Override
public Instances load(File file) {

    // Set attributeFilter
    setAttributeFilter();

    // Register MetaModels
    try {
        registerMetaModels();
    } catch (Exception e1) {
        Console.printerrln("Metamodels cannot be registered!");
        e1.printStackTrace();
    }

    // Set location of decent and arffx Model
    String decentModelLocation = file.getAbsolutePath();
    String pathToDecentModelFolder = decentModelLocation.substring(0,
            decentModelLocation.lastIndexOf(File.separator));
    String arffxModelLocation = pathToDecentModelFolder + "/model.arffx";
    String logModelLocation = pathToDecentModelFolder + "/model.log";
    String arffLocation = pathToDecentModelFolder + "/model.arff";

    // If arff File exists, load from it!
    if (new File(arffLocation).exists()) {
        System.out.println("Loading arff File...");
        BufferedReader reader;
        Instances data = null;
        try {
            reader = new BufferedReader(new FileReader(arffLocation));
            data = new Instances(reader);
            reader.close();
        } catch (FileNotFoundException e) {
            Console.printerrln("File with path: " + arffLocation + " was not found.");
            throw new RuntimeException(e);
        } catch (IOException e) {
            Console.printerrln("File with path: " + arffLocation + " cannot be read.");
            throw new RuntimeException(e);
        }

        // Set class attribute if not set
        if (data.classIndex() == -1) {
            Attribute classAttribute = data.attribute(classAttributeName);
            data.setClass(classAttribute);
        }

        return data;
    }

    // Location of EOL Scripts
    String preprocess = "./decent/epsilon/query/preprocess.eol";
    String arffxToArffSource = "./decent/epsilon/query/addLabels.eol";

    // Set Log Properties
    System.setProperty("epsilon.logLevel", logLevel);
    System.setProperty("epsilon.logToFile", logToFile);
    System.setProperty("epsilon.logFileAvailable", "false");

    // Set decent2arffx Properties
    System.setProperty("epsilon.transformation.decent2arffx.skipSource", "false");
    System.setProperty("epsilon.transformation.decent2arffx.type", "code");

    // Preprocess Data, transform from decent2arffx
    try {
        IEolExecutableModule preProcessModule = loadModule(preprocess);
        IModel preProcessDecentModel = modelHandler.getDECENTModel(decentModelLocation, true, true);
        IModel preProcessArffxarffxModel = modelHandler.getARFFxModel(arffxModelLocation, false, true);
        preProcessModule.getContext().getModelRepository().addModel(preProcessDecentModel);
        preProcessModule.getContext().getModelRepository().addModel(preProcessArffxarffxModel);
        execute(preProcessModule, logModelLocation);
        preProcessDecentModel.dispose();
        preProcessArffxarffxModel.dispose();
        preProcessModule.reset();
    } catch (URISyntaxException e) {
        Console.printerrln("URI Syntax for decent or arffx model is wrong.");
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }

    // Transform to arff, for label and confidence attributes
    try {
        IEolExecutableModule arffxToArffModule = loadModule(arffxToArffSource);
        IModel arffxToArffArffxModel = modelHandler.getARFFxModel(arffxModelLocation, true, true);
        arffxToArffModule.getContext().getModelRepository().addModel(arffxToArffArffxModel);
        execute(arffxToArffModule, logModelLocation);
        arffxToArffArffxModel.dispose();
        // can be stored and retained alternatively
        arffxToArffModule.reset();
    } catch (URISyntaxException e) {
        Console.printerrln("URI Syntax for arffx model is wrong.");
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }

    // Unregister MetaModels, otherwise cast will fail
    HashMap<String, Object> metaModelCache = new HashMap<>();
    for (String key : EPackage.Registry.INSTANCE.keySet()) {
        metaModelCache.put(key, EPackage.Registry.INSTANCE.get(key));
    }
    ;

    for (String key : metaModelCache.keySet()) {
        EPackage.Registry.INSTANCE.remove(key);
    }
    ;

    // Workaround to gernerate a usable URI. Absolute path is not
    // possible, therefore we need to construct a relative path

    URL location = DecentDataLoader.class.getProtectionDomain().getCodeSource().getLocation();
    String basePath = location.getFile();

    // Location is the bin folder, so we need to delete the last 4 characters
    basePath = basePath.substring(0, basePath.length() - 4);
    String relativePath = new File(basePath).toURI().relativize(new File(arffxModelLocation).toURI()).getPath();

    // Loard arffx file and create WEKA Instances
    ARFFxResourceTool tool = new ARFFxResourceTool();
    Resource resource = tool.loadResourceFromXMI(relativePath, "arffx");

    Instances dataSet = null;
    for (EObject o : resource.getContents()) {
        Model m = (Model) o;
        dataSet = createWekaDataFormat(m);

        for (Instance i : m.getData()) {
            createWekaInstance(dataSet, i);
        }
    }

    // Set class attribute
    Attribute classAttribute = dataSet.attribute(classAttributeName);
    dataSet.setClass(classAttribute);

    // Save as ARFF
    save(dataSet, arffLocation);

    return dataSet;

}

From source file:de.upb.timok.utils.DatasetTransformationUtils.java

License:Open Source License

public static Instances trainingSetToInstances(List<double[]> trainingSet) {
    final double[] sample = trainingSet.get(0);
    final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length + 1);
    for (int i = 0; i < sample.length; i++) {
        fvWekaAttributes.add(new Attribute(Integer.toString(i)));
    }/*w  w w . j av  a2s . co m*/

    final ArrayList<String> classStrings = new ArrayList<>();
    classStrings.add("normal");
    final Attribute ClassAttribute = new Attribute("class", classStrings);

    // Declare the feature vector
    fvWekaAttributes.add(ClassAttribute);
    final Instances result = new Instances("trainingSet", fvWekaAttributes, trainingSet.size());
    result.setClass(ClassAttribute);
    result.setClassIndex(fvWekaAttributes.size() - 1);
    for (final double[] instance : trainingSet) {
        final double[] newInstance = Arrays.copyOf(instance, instance.length + 1);
        newInstance[newInstance.length - 1] = 0;
        final Instance wekaInstance = new DenseInstance(1, newInstance);
        wekaInstance.setDataset(result);
        result.add(wekaInstance);
    }
    return result;
}

From source file:elh.eus.absa.CLI.java

License:Open Source License

/**
 * Main access to the polarity detection training functionalities.
 *
 * @throws IOException/*from  w ww  .  ja  va2s. c o  m*/
 * input output exception if problems with corpora
 */
public final void trainATP(final InputStream inputStream) throws IOException {
    // load training parameters file
    String paramFile = parsedArguments.getString("params");
    String corpusFormat = parsedArguments.getString("corpusFormat");
    String validation = parsedArguments.getString("validation");
    String lang = parsedArguments.getString("language");
    String classes = parsedArguments.getString("classnum");
    int foldNum = Integer.parseInt(parsedArguments.getString("foldNum"));
    //boolean printPreds = parsedArguments.getBoolean("printPreds");

    CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang);
    System.err.println("trainATP : Corpus read, creating features");
    Features atpTrain = new Features(reader, paramFile, classes);
    Instances traindata;
    if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) {
        traindata = atpTrain.loadInstancesTAB(true, "atp");
    } else if (corpusFormat.equalsIgnoreCase("tabNotagged") && lang.equalsIgnoreCase("eu")) {
        traindata = atpTrain.loadInstancesConll(true, "atp");
    } else {
        traindata = atpTrain.loadInstances(true, "atp");
    }

    //setting class attribute (entCat|attCat|entAttCat|polarityCat)
    traindata.setClass(traindata.attribute("polarityCat"));
    WekaWrapper classify;
    try {
        Properties params = new Properties();
        params.load(new FileInputStream(paramFile));
        String modelPath = params.getProperty("fVectorDir");
        classify = new WekaWrapper(traindata, true);
        classify.saveModel(modelPath + File.separator + "elixa-atp_" + lang + ".model");
        switch (validation) {
        case "cross":
            classify.crossValidate(foldNum);
            break;
        case "trainTest":
            classify.trainTest();
            break;
        case "both":
            classify.crossValidate(foldNum);
            classify.trainTest();
            break;
        default:
            System.out.println("train-atp: wrong validation option. Model saved but not tested");
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:elh.eus.absa.CLI.java

License:Open Source License

/**
 * Main access to the polarity tagging functionalities. Target based polarity. 
 *
 * @throws IOException//from  w w w  .j a v  a2  s . com
 * input output exception if problems with corpora
 */
public final void evalATP(final InputStream inputStream) throws IOException, JDOMException {

    String paramFile = parsedArguments.getString("params");
    String corpusFormat = parsedArguments.getString("corpusFormat");
    String model = parsedArguments.getString("model");
    String lang = parsedArguments.getString("language");
    String classnum = parsedArguments.getString("classnum");
    boolean ruleBased = parsedArguments.getBoolean("ruleBasedClassifier");
    boolean printPreds = parsedArguments.getBoolean("outputPredictions");

    //Read corpus sentences
    CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang);

    //Rule-based Classifier.
    if (ruleBased) {
        Properties params = new Properties();
        params.load(new FileInputStream(new File(paramFile)));

        String posModelPath = params.getProperty("pos-model");
        String lemmaModelPath = params.getProperty("lemma-model");
        String kafDir = params.getProperty("kafDir");

        /* polarity lexicon. Domain specific polarity lexicon is given priority.
         * If no domain lexicon is found it reverts to general polarity lexicon.
         * If no general polarity lexicon is found program exits with error message.
        */
        String lex = params.getProperty("polarLexiconDomain", "none");
        if (lex.equalsIgnoreCase("none")) {
            lex = params.getProperty("polarLexiconGeneral", "none");
            if (lex.equalsIgnoreCase("none")) {
                System.err.println("Elixa Error :: Rule-based classifier is selected but no polarity"
                        + " lexicon has been specified. Either specify one or choose ML classifier");
                System.exit(1);
            }
        }
        File lexFile = new File(lex);
        Evaluator evalDoc = new Evaluator(lexFile, "lemma");

        for (String oId : reader.getOpinions().keySet()) {
            // sentence posTagging
            String taggedKaf = reader.tagSentenceTab(reader.getOpinion(oId).getsId(), kafDir, posModelPath,
                    lemmaModelPath);
            //process the postagged sentence with the word count based polarity tagger
            Map<String, String> results = evalDoc.polarityScoreTab(taggedKaf, lexFile.getName());
            String lblStr = results.get("polarity");
            String actual = "?";
            if (reader.getOpinion(oId).getPolarity() != null) {
                actual = reader.getOpinion(oId).getPolarity();
            }
            String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", "");
            System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + reader.getOpinionSentence(oId));
            reader.getOpinion(oId).setPolarity(lblStr);
        }
    }
    //ML Classifier (default)
    else {
        Features atpTest = new Features(reader, paramFile, classnum, model);
        Instances testdata;
        if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) {
            testdata = atpTest.loadInstancesTAB(true, "atp");
        } else {
            testdata = atpTest.loadInstances(true, "atp");
        }
        //   setting class attribute (entCat|attCat|entAttCat|polarityCat)
        testdata.setClass(testdata.attribute("polarityCat"));

        WekaWrapper classify;
        try {
            classify = new WekaWrapper(model);

            System.err.println("evalAtp : going to test the model");
            //sort according to the instanceId
            //traindata.sort(atpTrain.getAttIndexes().get("instanceId"));
            //Instances testdata = new Instances(traindata);
            //testdata.deleteAttributeAt(0);
            //classify.setTestdata(testdata);
            classify.setTestdata(testdata);
            classify.testModel(model);

            if (printPreds) {
                for (String oId : reader.getOpinions().keySet()) {
                    int iId = atpTest.getOpinInst().get(oId);
                    Instance i = testdata.get(iId - 1);
                    double label = classify.getMLclass().classifyInstance(i);
                    String lblStr = i.classAttribute().value((int) label);
                    String actual = "?";
                    if (reader.getOpinion(oId).getPolarity() != null) {
                        actual = reader.getOpinion(oId).getPolarity();
                    }
                    String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", "");
                    String oSent = reader.getOpinionSentence(oId);
                    if (corpusFormat.startsWith("tab")) {
                        StringBuilder sb = new StringBuilder();
                        for (String kk : oSent.split("\n")) {
                            sb.append(kk.split("\\t")[0]);
                            sb.append(" ");
                        }
                        oSent = sb.toString();
                    }

                    System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + oSent + "\t"
                            + reader.getOpinionSentence(oId).replaceAll("\n", " ").replaceAll("\\t", ":::"));
                    reader.getOpinion(oId).setPolarity(lblStr);
                }
            }
            //reader.print2Semeval2015format(model+"tagATP.xml");
            //reader.print2conll(model+"tagAtp.conll");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

From source file:elh.eus.absa.CLI.java

License:Open Source License

/**
 * Main access to the polarity tagging functionalities. Target based polarity. 
 *
 * @throws IOException/*w  ww . j  a va  2s. co m*/
 * input output exception if problems with corpora
 * @throws JDOMException 
 */
public final void tagATP(final InputStream inputStream) throws IOException, JDOMException {
    // load training parameters file
    String paramFile = parsedArguments.getString("params");
    String corpusFormat = parsedArguments.getString("corpusFormat");
    String model = parsedArguments.getString("model");
    String lang = parsedArguments.getString("language");
    String classnum = parsedArguments.getString("classnum");
    boolean ruleBased = parsedArguments.getBoolean("ruleBasedClassifier");

    //Read corpus sentences
    CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang);

    //Rule-based Classifier.
    if (ruleBased) {
        Properties params = new Properties();
        params.load(new FileInputStream(new File(paramFile)));

        String posModelPath = params.getProperty("pos-model");
        String lemmaModelPath = params.getProperty("lemma-model");
        String kafDir = params.getProperty("kafDir");

        /* polarity lexicon. Domain specific polarity lexicon is given priority.
         * If no domain lexicon is found it reverts to general polarity lexicon.
         * If no general polarity lexicon is found program exits with error message.
        */
        String lex = params.getProperty("polarLexiconDomain", "none");
        if (lex.equalsIgnoreCase("none")) {
            lex = params.getProperty("polarLexiconGeneral", "none");
            if (lex.equalsIgnoreCase("none")) {
                System.err.println("Elixa Error :: Rule-based classifier is selected but no polarity"
                        + " lexicon has been specified. Either specify one or choose ML classifier");
                System.exit(1);
            }
        }
        File lexFile = new File(lex);
        Evaluator evalDoc = new Evaluator(lexFile, "lemma");

        for (String oId : reader.getOpinions().keySet()) {
            // sentence posTagging
            String taggedKaf = reader.tagSentenceTab(reader.getOpinion(oId).getsId(), kafDir, posModelPath,
                    lemmaModelPath);
            //process the postagged sentence with the word count based polarity tagger
            Map<String, String> results = evalDoc.polarityScoreTab(taggedKaf, lexFile.getName());
            String lblStr = results.get("polarity");
            String actual = "?";
            if (reader.getOpinion(oId).getPolarity() != null) {
                actual = reader.getOpinion(oId).getPolarity();
            }
            String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", "");
            System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + reader.getOpinionSentence(oId));
            reader.getOpinion(oId).setPolarity(lblStr);
        }
    } else {
        Features atpTrain = new Features(reader, paramFile, classnum, model);
        Instances traindata;
        if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) {
            traindata = atpTrain.loadInstancesTAB(true, "atp");
        } else if (lang.equalsIgnoreCase("eu")
                && (corpusFormat.equalsIgnoreCase("tabNotagged") || corpusFormat.equalsIgnoreCase("ireom"))) {
            traindata = atpTrain.loadInstancesConll(true, "atp");
        } else {
            traindata = atpTrain.loadInstances(true, "atp");
        }

        //   setting class attribute (entCat|attCat|entAttCat|polarityCat)
        traindata.setClass(traindata.attribute("polarityCat"));

        WekaWrapper classify;
        try {
            classify = new WekaWrapper(model);

            System.err.println();
            //sort according to the instanceId
            //traindata.sort(atpTrain.getAttIndexes().get("instanceId"));
            //Instances testdata = new Instances(traindata);
            //testdata.deleteAttributeAt(0);
            //classify.setTestdata(testdata);
            classify.setTestdata(traindata);
            classify.loadModel(model);

            for (String oId : reader.getOpinions().keySet()) {
                int iId = atpTrain.getOpinInst().get(oId);
                Instance i = traindata.get(iId - 1);
                double label = classify.getMLclass().classifyInstance(i);
                String lblStr = i.classAttribute().value((int) label);
                String actual = "?";
                if (reader.getOpinion(oId).getPolarity() != null) {
                    actual = reader.getOpinion(oId).getPolarity();
                }
                String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", "");
                String oSent = reader.getOpinionSentence(oId);
                if (corpusFormat.startsWith("tab")) {
                    StringBuilder sb = new StringBuilder();
                    for (String kk : oSent.split("\n")) {
                        sb.append(kk.split("\\t")[0]);
                        sb.append(" ");
                    }
                    oSent = sb.toString();
                }

                System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + oSent + "\t"
                        + reader.getOpinionSentence(oId).replaceAll("\n", " ").replaceAll("\\t", ":::"));
                reader.getOpinion(oId).setPolarity(lblStr);
            }

            //reader.print2Semeval2015format(model+"tagATP.xml");
            //reader.print2conll(model+"tagAtp.conll");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

From source file:elh.eus.absa.CLI.java

License:Open Source License

/**
 * Main access to the train-atc functionalities. Train ATC using a double one vs. all classifier
 * (E and A) for E#A aspect categories//w  ww.  j a  va2 s .  com
 * @throws Exception 
 */
public final void trainATC2(final InputStream inputStream) throws IOException {
    // load training parameters file
    String paramFile = parsedArguments.getString("params");
    String testFile = parsedArguments.getString("testset");
    String paramFile2 = parsedArguments.getString("params2");
    String corpusFormat = parsedArguments.getString("corpusFormat");
    //String validation = parsedArguments.getString("validation");
    String lang = parsedArguments.getString("language");
    //int foldNum = Integer.parseInt(parsedArguments.getString("foldNum"));
    //boolean printPreds = parsedArguments.getBoolean("printPreds");
    boolean nullSentenceOpinions = parsedArguments.getBoolean("nullSentences");
    boolean onlyTest = parsedArguments.getBoolean("testOnly");
    double threshold = 0.5;
    double threshold2 = 0.5;
    String modelsPath = "/home/inaki/elixa-atp/ovsaModels";

    CorpusReader reader = new CorpusReader(inputStream, corpusFormat, nullSentenceOpinions, lang);
    Features atcTrain = new Features(reader, paramFile, "3");
    Instances traindata = atcTrain.loadInstances(true, "atc");

    if (onlyTest) {
        if (FileUtilsElh.checkFile(testFile)) {
            System.err.println("read from test file");
            reader = new CorpusReader(new FileInputStream(new File(testFile)), corpusFormat,
                    nullSentenceOpinions, lang);
            atcTrain.setCorpus(reader);
            traindata = atcTrain.loadInstances(true, "atc");
        }
    }

    //setting class attribute (entCat|attCat|entAttCat|polarityCat)

    //HashMap<String, Integer> opInst = atcTrain.getOpinInst();      
    //WekaWrapper classifyAtts;
    WekaWrapper onevsall;
    try {

        //classify.printMultilabelPredictions(classify.multiLabelPrediction());      */   

        //onevsall
        Instances entdata = new Instances(traindata);
        entdata.deleteAttributeAt(entdata.attribute("attCat").index());
        entdata.deleteAttributeAt(entdata.attribute("entAttCat").index());
        entdata.setClassIndex(entdata.attribute("entCat").index());
        onevsall = new WekaWrapper(entdata, true);

        if (!onlyTest) {
            onevsall.trainOneVsAll(modelsPath, paramFile + "entCat");
            System.out.println("trainATC: one vs all models ready");
        }
        onevsall.setTestdata(entdata);
        HashMap<Integer, HashMap<String, Double>> ovsaRes = onevsall.predictOneVsAll(modelsPath,
                paramFile + "entCat");
        System.out.println("trainATC: one vs all predictions ready");
        HashMap<Integer, String> instOps = new HashMap<Integer, String>();
        for (String oId : atcTrain.getOpinInst().keySet()) {
            instOps.put(atcTrain.getOpinInst().get(oId), oId);
        }

        atcTrain = new Features(reader, paramFile2, "3");
        entdata = atcTrain.loadInstances(true, "attTrain2_data");
        entdata.deleteAttributeAt(entdata.attribute("entAttCat").index());
        //entdata.setClassIndex(entdata.attribute("entCat").index());

        Attribute insAtt = entdata.attribute("instanceId");
        double maxInstId = entdata.kthSmallestValue(insAtt, entdata.numDistinctValues(insAtt) - 1);
        System.err.println("last instance has index: " + maxInstId);
        for (int ins = 0; ins < entdata.numInstances(); ins++) {
            System.err.println("ins" + ins);
            int i = (int) entdata.instance(ins).value(insAtt);
            Instance currentInst = entdata.instance(ins);
            //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i));
            String sId = reader.getOpinion(instOps.get(i)).getsId();
            String oId = instOps.get(i);
            reader.removeSentenceOpinions(sId);
            int oSubId = 0;
            for (String cl : ovsaRes.get(i).keySet()) {
                //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                if (ovsaRes.get(i).get(cl) > threshold) {
                    //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));                  
                    // for the first one update the instances
                    if (oSubId >= 1) {
                        Instance newIns = new SparseInstance(currentInst);
                        newIns.setDataset(entdata);
                        entdata.add(newIns);
                        newIns.setValue(insAtt, maxInstId + oSubId);
                        newIns.setClassValue(cl);
                        instOps.put((int) maxInstId + oSubId, oId);

                    }
                    // if the are more create new instances
                    else {
                        currentInst.setClassValue(cl);
                        //create and add opinion to the structure
                        //   trgt, offsetFrom, offsetTo, polarity, cat, sId);
                        //Opinion op = new Opinion(instOps.get(i)+"_"+oSubId, "", 0, 0, "", cl, sId);
                        //reader.addOpinion(op);
                    }
                    oSubId++;
                }
            } //finished updating instances data                                    
        }

        entdata.setClass(entdata.attribute("attCat"));
        onevsall = new WekaWrapper(entdata, true);

        /**
         *  Bigarren sailkatzailea
         * 
         * */
        if (!onlyTest) {
            onevsall.trainOneVsAll(modelsPath, paramFile + "attCat");
            System.out.println("trainATC: one vs all attcat models ready");
        }

        ovsaRes = onevsall.predictOneVsAll(modelsPath, paramFile + "entAttCat");

        insAtt = entdata.attribute("instanceId");
        maxInstId = entdata.kthSmallestValue(insAtt, insAtt.numValues());
        System.err.println("last instance has index: " + maxInstId);
        for (int ins = 0; ins < entdata.numInstances(); ins++) {
            System.err.println("ins: " + ins);
            int i = (int) entdata.instance(ins).value(insAtt);
            Instance currentInst = entdata.instance(ins);
            //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i));
            String sId = reader.getOpinion(instOps.get(i)).getsId();
            String oId = instOps.get(i);
            reader.removeSentenceOpinions(sId);
            int oSubId = 0;
            for (String cl : ovsaRes.get(i).keySet()) {
                //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                if (ovsaRes.get(i).get(cl) > threshold2) {
                    ///System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                    if (ovsaRes.get(i).get(cl) > threshold) {
                        //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));                  
                        // for the first one update the instances
                        if (oSubId >= 1) {
                            String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl;
                            //create and add opinion to the structure
                            //   trgt, offsetFrom, offsetTo, polarity, cat, sId);                     
                            Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId);
                            reader.addOpinion(op);
                        }
                        // if the are more create new instances
                        else {
                            String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl;
                            //create and add opinion to the structure
                            //   trgt, offsetFrom, offsetTo, polarity, cat, sId);
                            reader.removeOpinion(oId);
                            Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId);
                            reader.addOpinion(op);
                        }
                        oSubId++;
                    }
                } //finished updating instances data                                    
            }
        }
        reader.print2Semeval2015format(paramFile + "entAttCat.xml");
    } catch (Exception e) {
        e.printStackTrace();
    }

    //traindata.setClass(traindata.attribute("entAttCat"));
    System.err.println("DONE CLI train-atc2 (oneVsAll)");
}

From source file:elh.eus.absa.CLI.java

License:Open Source License

/**
 * Main access to the train functionalities.
 * @throws Exception /*  w w  w  .ja v a2 s. c om*/
 */
public final void tagATC(final InputStream inputStream) throws IOException {
    // load training parameters file
    String paramFile = parsedArguments.getString("params");
    String corpusFormat = parsedArguments.getString("corpusFormat");
    //String validation = parsedArguments.getString("validation");
    String lang = parsedArguments.getString("language");
    int foldNum = Integer.parseInt(parsedArguments.getString("foldNum"));
    //boolean printPreds = parsedArguments.getBoolean("printPreds");

    CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang);
    Features atcTrain = new Features(reader, paramFile, "3");
    Instances traindata = atcTrain.loadInstances(true, "atc");

    //setting class attribute (entCat|attCat|entAttCat|polarityCat)

    //HashMap<String, Integer> opInst = atcTrain.getOpinInst();
    WekaWrapper classify;
    try {
        //train first classifier (entities)
        traindata.setClass(traindata.attribute("entCat"));
        classify = new WekaWrapper(traindata, true);
        classify.crossValidate(foldNum);
        //Classifier entityCl = classify.getMLclass().;

        //train second classifier (attributtes)
        traindata.setClass(traindata.attribute("attCat"));
        classify.setTraindata(traindata);
        classify.crossValidate(foldNum);
        //Classifier attCl = classify.getMLclass();

        classify.printMultilabelPredictions(classify.multiLabelPrediction());

    } catch (Exception e) {
        e.printStackTrace();
    }

    traindata.setClass(traindata.attribute("entAttCat"));
    System.err.println("DONE CLI train-atc");
}

From source file:epsi.i5.datamining.Weka.java

public void excutionAlgo() throws FileNotFoundException, IOException, Exception {
    BufferedReader reader = new BufferedReader(new FileReader("src/epsi/i5/data/" + fileOne + ".arff"));
    Instances data = new Instances(reader);
    reader.close();// w  w  w  . j a v a2  s.  c om
    //System.out.println(data.attribute(0));
    data.setClass(data.attribute(0));
    NaiveBayes NB = new NaiveBayes();
    NB.buildClassifier(data);
    Evaluation naiveBayes = new Evaluation(data);
    naiveBayes.crossValidateModel(NB, data, 10, new Random(1));
    naiveBayes.evaluateModel(NB, data);
    //System.out.println(test.confusionMatrix() + "1");
    //System.out.println(test.correct() + "2");
    System.out.println("*****************************");
    System.out.println("******** Naive Bayes ********");
    System.out.println(naiveBayes.toMatrixString());
    System.out.println("*****************************");
    System.out.println("**** Pourcentage Correct ****");
    System.out.println(naiveBayes.pctCorrect());
    System.out.println("");
    J48 j = new J48();
    j.buildClassifier(data);
    Evaluation jeval = new Evaluation(data);
    jeval.crossValidateModel(j, data, 10, new Random(1));
    jeval.evaluateModel(j, data);
    System.out.println("*****************************");
    System.out.println("************ J48 ************");
    System.out.println(jeval.toMatrixString());
    System.out.println("*****************************");
    System.out.println("**** Pourcentage Correct ****");
    System.out.println(jeval.pctCorrect());
    System.out.println("");
    DecisionTable DT = new DecisionTable();
    DT.buildClassifier(data);
    Evaluation decisionTable = new Evaluation(data);
    decisionTable.crossValidateModel(DT, data, 10, new Random(1));
    decisionTable.evaluateModel(DT, data);
    System.out.println("*****************************");
    System.out.println("******* DecisionTable *******");
    System.out.println(decisionTable.toMatrixString());
    System.out.println("*****************************");
    System.out.println("**** Pourcentage Correct ****");
    System.out.println(decisionTable.pctCorrect());
    System.out.println("");
    OneR OR = new OneR();
    OR.buildClassifier(data);
    Evaluation oneR = new Evaluation(data);
    oneR.crossValidateModel(OR, data, 10, new Random(1));
    oneR.evaluateModel(OR, data);
    System.out.println("*****************************");
    System.out.println("************ OneR ***********");
    System.out.println(oneR.toMatrixString());
    System.out.println("*****************************");
    System.out.println("**** Pourcentage Correct ****");
    System.out.println(oneR.pctCorrect());

    //Polarit
    data.setClass(data.attribute(1));
    System.out.println("");
    M5Rules MR = new M5Rules();
    MR.buildClassifier(data);
    Evaluation m5rules = new Evaluation(data);
    m5rules.crossValidateModel(MR, data, 10, new Random(1));
    m5rules.evaluateModel(MR, data);
    System.out.println("*****************************");
    System.out.println("********** M5Rules **********");
    System.out.println(m5rules.correlationCoefficient());

    System.out.println("");
    LinearRegression LR = new LinearRegression();
    LR.buildClassifier(data);
    Evaluation linearR = new Evaluation(data);
    linearR.crossValidateModel(LR, data, 10, new Random(1));
    linearR.evaluateModel(LR, data);
    System.out.println("*****************************");
    System.out.println("********** linearR **********");
    System.out.println(linearR.correlationCoefficient());
}

From source file:gate.plugin.learningframework.data.CorpusRepresentationWeka.java

/**
 * Create a Weka dataset from just the meta-information of the Mallet representation.
 * This creates an empty Instances object that has all the attributes constructed from 
 * the information we have in the Mallet representation. 
 * The dataset will always have a class attribute defined: if there is a mallet target alphabet,
 * a nominal (class) attribute, otherwise a numeric (regression) attribute.
 *///from   www. j  a  v  a 2s .  c om
public static Instances emptyDatasetFromMallet(CorpusRepresentationMallet cr) {
    if (!(cr instanceof CorpusRepresentationMalletTarget)) {
        throw new GateRuntimeException("Conversion to weka not implemented yet: " + cr.getClass());
    }
    InstanceList malletInstances = cr.getRepresentationMallet();
    Alphabet dataAlph = malletInstances.getDataAlphabet();
    Pipe pipe = malletInstances.getPipe();
    // the pipe should always be an instance of LFPipe, but we allow this to be used for instancelists
    // which have been created in a different way and contain some other type of Pipe.
    // If we do hava a LFPipe, we create a map that can be used to figure out which of the 
    // mallet features are either boolean or nominal with a numeric coding. Otherwise, we 
    // regard all features as numeric. 

    // This maps from the mallet feature name to the alphabet for a nominal feature we have
    // stored in our attribute, or to a placeholder alphabet containing true/false if we have
    // a boolean feature.
    Alphabet booleanAlph = new Alphabet();
    booleanAlph.lookupIndex("false");
    booleanAlph.lookupIndex("true");
    Map<String, Alphabet> name2lfalph = new HashMap<String, Alphabet>();

    if (pipe instanceof LFPipe) {
        LFPipe lfpipe = (LFPipe) pipe;
        // go through all the antries in the instances data alphabet and try to figure out which
        // of the featuers are either boolean ore nominals coded as number
        for (int i = 0; i < dataAlph.size(); i++) {
            String malletFeatureName = (String) dataAlph.lookupObject(i);
            gate.plugin.learningframework.features.Attribute lfatt = FeatureExtraction
                    .lookupAttributeForFeatureName(lfpipe.getFeatureInfo().getAttributes(), malletFeatureName);
            Alphabet alphToUse = null;
            if (lfatt instanceof gate.plugin.learningframework.features.AttributeList) {
                if (((gate.plugin.learningframework.features.AttributeList) lfatt).datatype == Datatype.bool) {
                    alphToUse = booleanAlph;
                } else {
                    if (((gate.plugin.learningframework.features.AttributeList) lfatt).datatype == Datatype.nominal
                            && ((gate.plugin.learningframework.features.AttributeList) lfatt).codeas == CodeAs.number) {
                        alphToUse = ((gate.plugin.learningframework.features.AttributeList) lfatt).alphabet;
                    }
                }
            } else if (lfatt instanceof gate.plugin.learningframework.features.SimpleAttribute) {
                if (((gate.plugin.learningframework.features.SimpleAttribute) lfatt).datatype == Datatype.bool) {
                    alphToUse = booleanAlph;
                } else {
                    if (((gate.plugin.learningframework.features.SimpleAttribute) lfatt).datatype == Datatype.nominal
                            && ((gate.plugin.learningframework.features.SimpleAttribute) lfatt).codeas == CodeAs.number) {
                        alphToUse = ((gate.plugin.learningframework.features.SimpleAttribute) lfatt).alphabet;
                    }
                }
            }
            // if alphToUse is not null, add it to the map
            if (alphToUse != null) {
                name2lfalph.put(malletFeatureName, alphToUse);
            }
        }
    }
    // This is the information weka needs about the attributes
    ArrayList<Attribute> wekaAttributes = new ArrayList<Attribute>();
    // now go through the data alphabet again and add one weka attribute to the attributes list
    // for each mallet feature. If we know an alphabet for the mallet feature, create the 
    // weka attribute as a nominal otherwise as a numeric weka attribute.
    for (int i = 0; i < pipe.getDataAlphabet().size(); i++) {
        String malletFeatureName = (String) pipe.getDataAlphabet().lookupObject(i);
        Alphabet lfalph = name2lfalph.get(malletFeatureName);
        if (lfalph == null) {
            wekaAttributes.add(new Attribute(malletFeatureName));
        } else {
            List<String> nomVals = new ArrayList<String>(lfalph.size());
            for (int j = 0; j < lfalph.size(); j++) {
                nomVals.add((String) lfalph.lookupObject(j));
            }
            wekaAttributes.add(new Attribute(malletFeatureName, nomVals));
        }
    }
    // now add the class attribute, if necessary: if there is a target alphabet, the class must be nominal,
    // so create a nominal weka attribute, otherwise, create a numeric one
    weka.core.Attribute targetAttr = null;
    if (pipe.getTargetAlphabet() != null) {
        Alphabet talph = pipe.getTargetAlphabet();
        // create the values for the target from the target alphabet
        List<String> classVals = new ArrayList<String>();
        for (int i = 0; i < talph.size(); i++) {
            classVals.add((String) talph.lookupObject(i));
        }
        targetAttr = new Attribute("class", classVals);
        wekaAttributes.add(targetAttr);
        System.err.println("LF: created an empty weka dataset for classification");
    } else {
        targetAttr = new Attribute("target");
        wekaAttributes.add(targetAttr);
        System.err.println("LF: created an empty weka dataset for regression");
    }
    // create the weka dataset 
    Instances insts = new weka.core.Instances("GATELearningFramework", wekaAttributes, malletInstances.size());
    insts.setClass(targetAttr);
    return insts;
}

From source file:lu.lippmann.cdb.common.gui.dataset.InstancesLoaderDialogFactory.java

License:Open Source License

private static Instances showDialog(final Component parent, final boolean setClass) throws Exception {
    final Preferences prefs = Preferences.userRoot().node("CadralDecisionBuild");
    final String path = prefs.get(REG_KEY, WekaDataAccessUtil.DEFAULT_SAMPLE_DIR);

    final JFileChooser fc = new JFileChooser();
    fc.setCurrentDirectory(new File(path));
    final int returnVal = fc.showOpenDialog(parent);
    if (returnVal == JFileChooser.APPROVE_OPTION) {
        final File file = fc.getSelectedFile();
        if (file != null) {
            prefs.put(REG_KEY, file.getPath());
            final Instances ds = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(file);
            final Attribute defaultClassAttr = ds.classIndex() >= 0 ? ds.classAttribute() : ds.attribute(0);
            ds.setClassIndex(-1);//from   w w w.  j  a v  a2  s  .c o  m
            ds.setRelationName(file.getPath());
            final List<String> attributesNames = new ArrayList<String>();
            final Enumeration<?> e = ds.enumerateAttributes();
            while (e.hasMoreElements()) {
                final Attribute attr = (Attribute) e.nextElement();
                attributesNames.add(attr.name());
            }

            if (setClass) {
                final String s = (String) JOptionPane.showInputDialog(parent,
                        "Select the class attribute for '" + file.getName() + "' (default:'"
                                + defaultClassAttr.name() + "'): ",
                        "Class selection", JOptionPane.QUESTION_MESSAGE, null, // icon
                        attributesNames.toArray(), attributesNames.get(attributesNames.size() - 1));
                if (s != null) {
                    ds.setClass(ds.attribute(s));
                } else {
                    //Otherwise no class defined and CACHE attributeClass => No class index defined after cancel + retry
                    ds.setClass(defaultClassAttr);
                    return null;
                }
            } else {
                ds.setClass(defaultClassAttr);
            }
            return ds;
        } else
            throw new Exception();
    } else
        return null;
}