elh.eus.absa.CLI.java Source code

Introduction

Here is the source code for elh.eus.absa.CLI.java
Source

/*
 * Copyright 2014 Elhuyar Fundazioa
    
This file is part of EliXa.
    
EliXa is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
    
EliXa is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
    
You should have received a copy of the GNU General Public License
along with EliXa.  If not, see <http://www.gnu.org/licenses/>.
 */

package elh.eus.absa;

import ixa.kaflib.KAFDocument;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

import net.sourceforge.argparse4j.ArgumentParsers;
import net.sourceforge.argparse4j.impl.Arguments;
import net.sourceforge.argparse4j.inf.ArgumentParser;
import net.sourceforge.argparse4j.inf.ArgumentParserException;
import net.sourceforge.argparse4j.inf.Namespace;
import net.sourceforge.argparse4j.inf.Subparser;
import net.sourceforge.argparse4j.inf.Subparsers;

import org.jdom2.JDOMException;

import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.SparseInstance;
//import elh.eus.absa.TrainerSVMlight;

/**
 * Main class of elh-eus-absa-atp, the elhuyar absa ATP modules
 * tagger.
 *
 * @author isanvi
 * @version 2014-12-13
 *
 */
public class CLI {
    /**
     * Get dynamically the version of elh-eus-absa-atp by looking at the MANIFEST
     * file.
     */
    private final String version = CLI.class.getPackage().getImplementationVersion();
    /**
     * Name space of the arguments provided at the CLI.
     */
    private Namespace parsedArguments = null;
    /**
     * Argument parser instance.
     */
    private ArgumentParser argParser = ArgumentParsers.newArgumentParser("elixa-" + version + ".jar").description(
            "elixa-" + version + " is a multilingual ABSA module developed by the Elhuyar Foundation R&D Unit.\n");
    /**
     * Sub parser instance.
     */
    private Subparsers subParsers = argParser.addSubparsers().help("sub-command help");
    /**
     * The parser that manages the tagging sub-command.
     */
    private Subparser annotateParser;
    /**
     * The parser that manages the ATP (global polarity) training sub-command.
     */
    private Subparser trainATPParser;
    /**
     * The parser that manages the ATP (global polarity) evaluation sub-command.
     */
    private Subparser evalATPParser;
    /**
     * The parser that manages the ATP (global polarity) tagging sub-command.
     */
    private Subparser tagATPParser;
    /**
     * The parser that manages the ATC (target category classification) training sub-command.
     */
    private Subparser trainATCParser;
    private Subparser trainATC2Parser;

    /**
     * The parser that manages the slot2 (OTE) tagging sub-command.
     */
    private Subparser slot2Parser;

    /**
     * The parser that manages the evaluation sub-command.
     */
    private Subparser tagSentParser;

    /**
     * Parser that manages the polarity tagging and estimation of a text (kaf format for the moment).
     */
    private Subparser predictParser;

    /**
     * Construct a CLI object with the three sub-parsers to manage the command
     * line parameters.
     */
    public CLI() {
        annotateParser = subParsers.addParser("tag-ate").help("Tagging CLI");
        loadAnnotateParameters();
        trainATPParser = subParsers.addParser("train-gp").help("ATP training CLI");
        loadATPTrainingParameters();
        evalATPParser = subParsers.addParser("eval-gp").help("ATP evaluation CLI");
        loadATPevalParameters();
        tagATPParser = subParsers.addParser("tag-gp").help("ATP Tagging CLI");
        loadATPtagParameters();
        trainATCParser = subParsers.addParser("train-atc").help("ATC training CLI (single classifier)");
        loadATCTrainingParameters();
        trainATC2Parser = subParsers.addParser("train-atc2").help("ATC Training CLI (E & A classifiers");
        loadATC2TrainingParameters();
        slot2Parser = subParsers.addParser("slot2").help("Semeval 2015 slot2 (ATE) formatting CLI");
        loadslot2Parameters();
        tagSentParser = subParsers.addParser("tagSentences").help("Lemmatization and PoS tagging CLI");
        loadTagSentParameters();
        predictParser = subParsers.addParser("tag-naf").help("Predict polarity of a text");
        loadPredictionParameters();
    }

    /**
     * Main entry point of elixa.
     *
     * @param args
     * the arguments passed through the CLI
     * @throws IOException
     * exception if input data not available
     * @throws JDOMException
     * if problems with the xml formatting of NAF
     */
    public static void main(final String[] args) throws IOException, JDOMException {
        CLI cmdLine = new CLI();
        cmdLine.parseCLI(args);
    }

    /**
     * Parse the command interface parameters with the argParser.
     *
     * @param args
     * the arguments passed through the CLI
     * @throws IOException
     * exception if problems with the incoming data
     * @throws JDOMException
     */
    public final void parseCLI(final String[] args) throws IOException, JDOMException {
        try {
            parsedArguments = argParser.parseArgs(args);
            System.err.println("CLI options: " + parsedArguments);
            if (args[0].equals("tagSentences")) {
                tagSents(System.in);
            } else if (args[0].equals("train-atp") || args[0].equals("train-gp")) {
                trainATP(System.in);
            } else if (args[0].equals("eval-atp") || args[0].equals("eval-gp")) {
                evalATP(System.in);
            } else if (args[0].equals("train-atc")) {
                trainATC(System.in);
            } else if (args[0].equals("train-atc2")) {
                trainATC2(System.in);
            } else if (args[0].equals("tag-atp") || args[0].equals("tag-gp")) {
                tagATP(System.in);
            } else if (args[0].equals("tag-ate")) {
                tagATE(System.in, System.out);
            } else if (args[0].equals("slot2")) {
                slot2(System.in);
            } else if (args[0].equals("tag-naf")) {
                predictPolarity(System.in);
            }
        } catch (ArgumentParserException e) {
            argParser.handleError(e);
            System.out.println("Run java -jar target/elixa-" + version
                    + ".jar (train-atc|slot2|tagSentences|tag-ate|train-gp|tag-gp|tag-naf) -help for details");
            System.exit(1);
        }
    }

    public final void predictPolarity(final InputStream inputStream) throws IOException {

        //String files = parsedArguments.getString("file");
        String lexicon = parsedArguments.getString("lexicon");
        //String estimator = parsedArguments.getString("estimator");
        //String synset = parsedArguments.getString("synset");
        float threshold = parsedArguments.getFloat("threshold");
        boolean printPol = parsedArguments.getBoolean("estimatePolarity");

        //System.out.println("Polarity Predictor: ");
        //BufferedReader freader = new BufferedReader(new FileReader(files));         
        //String line;
        //while ((line = freader.readLine()) != null) 
        //{
        try {
            KAFDocument naf = KAFDocument.createFromStream(new InputStreamReader(inputStream));

            File lexFile = new File(lexicon);
            Evaluator evalDoc = new Evaluator(lexFile, "lemma", threshold, "avg");
            Map<String, String> results = evalDoc.processKaf(naf, lexFile.getName());
            naf.print();
            if (printPol) {
                System.out.println("<Elixa-gp>\n" + "\t<sentiment-words>" + results.get("sentTermNum")
                        + "</sentiment-words>\n" + "\t<polarity-score>" + results.get("avg") + "</polarity-score>\n"
                        + "\t<polarity-threshold>" + results.get("thresh") + "</polarity-threshold>\n"
                        + "\t<polarity>" + results.get("polarity") + "</polarity>\n" + "</Elixa-gp>\n");
            }
            //Map<String, Double> results = avg.processCorpus(corpus);
            //System.out.println("eval avg done"+results.toString());
            /*System.out.println("Prediction with avg done: \n"
                  + "\tTagged file: "+results.get("taggedFile")+"\n"
                  + "\tNumber of words containing sentiment found: "+results.get("sentTermNum")+"\n"
                  + "\tPolarity score: "+results.get("avg")
                  + "\tPolarity (threshold -> "+results.get("thresh")+"): "+results.get("polarity"));*/
            //FileUtilsElh.prettyPrintSentKaf(results);               
        } catch (Exception e) {
            //System.err.println("predictPolarity: error when processing "+line+" file");
            System.err.println("EliXa::tag-naf: error when processing naf");
            //e.printStackTrace();
        }
        //}
        //freader.close();
    }

    private void loadPredictionParameters() {
        /*
         *  Parameters:
             - Input File (-f | --file= ): File containing the a list of text files in KAF format whose polarity we want to estimate.    
             - dict file  (-l | --lexicon= ): path to the polarity lexicon.
             - Synset polarities (-s | --synset=): default polarities are calculated over lemmas. With this option polarity of synsets is taken into account instead of words. It has two posible values: (first|rank). 'first' uses the sense with the highest confidence value for the lemma. 'rank' uses complete ranking of synsets.
             - Dictionary weights (-w | --weights): use polarity weights instead of binary polarities (pos/neg). If the dictionary does not provide polarity scores the program defaults to binary polarities.
             - Threshold (-t | --threshold=) [-1,1]: Threshold which limits positive and negative reviews. Default value is 0.
             - Polarity score estimator (-e| --estimator) [avg|moh]: average polarity ratio or estimator proposed in (Mohammad et al.,2009 - EMNLP) 
            
         * 
         */

        //predictParser.addArgument("-f", "--file")
        //.required(true)
        //.help("Input file to predict the polarity lexicon.\n");

        predictParser.addArgument("-l", "--lexicon").required(true).help("Path to the polarity lexicon file.\n");

        //predictParser.addArgument("-s", "--synset")
        //.choices("lemma", "first","rank")
        //.required(false)
        //.setDefault("lemma")
        //.help(
        //      "Default polarities are calculated over lemmas. With this option polarity of synsets is taken into account instead of words. Possible values: (lemma|first|rank). 'first' uses the sense with the highest confidence value for the lemma. 'rank' uses complete ranking of synsets.\n");

        //predictParser.addArgument("-w", "--weights")
        //.action(Arguments.storeTrue())
        //.help(
        //      "Use polarity weights instead of binary polarities (pos/neg). If the dictionary does not provide polarity scores the program defaults to binary polarities.\n");

        predictParser.addArgument("-t", "--threshold").required(false).setDefault((float) 0).help(
                "Threshold which limits positive and negative reviews. Float in the [-1,1] range. Default value is 0."
                        + " It is used in combination with the --estimatPolarity\n");

        predictParser.addArgument("-e", "--estimatePolarity").action(Arguments.storeTrue()).help(
                "print a polarity estimation based on a simple average word polarity count (from words in the lexicon given).\n"
                        + "WARNING: this polarity estimation is for test purposes. If you activate it an additional element will be "
                        + "printed with the estimation statistics <Elixa-gp>, but the resulting naf won't be valid if that line is not deleted.\n");
    }

    public final void tagSents(final InputStream inputStream) {
        String posModel = parsedArguments.getString("model");
        String lemmaModel = parsedArguments.getString("lemmaModel");
        String dir = parsedArguments.getString("dir");
        String lang = parsedArguments.getString("language");
        String format = parsedArguments.getString("format");
        boolean print = parsedArguments.getBoolean("print");

        CorpusReader reader = new CorpusReader(inputStream, format, lang);
        try {
            String tagDir = dir + File.separator + lang;
            Files.createDirectories(Paths.get(tagDir));
            reader.tagSentences(tagDir, posModel, lemmaModel, print);
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

    public final void loadTagSentParameters() {
        tagSentParser.addArgument("-m", "--model").required(true)
                .help("Pass the model to do the tagging as a parameter.\n");
        tagSentParser.addArgument("-d", "--dir").required(true).help("directory to store tagged files.\n");
        tagSentParser.addArgument("-f", "--format").setDefault("tabNotagged").choices("tabNotagged", "semeval2015")
                .help("format of the input corpus.\n");
        tagSentParser.addArgument("-p", "--print").action(Arguments.storeTrue())
                .help("Whether the tagged files should be printed as a corpus.\n");
        tagSentParser.addArgument("-l", "--language").setDefault("en")
                .choices("de", "en", "es", "eu", "it", "nl", "fr")
                .help("Choose language; if not provided it defaults to: \n"
                        + "\t- If the input format is NAF, the language value in incoming NAF file."
                        + "\t- en otherwise.\n");
    }

    /**
     * Main method to do Aspect Term Extraction tagging.
     *
     * @param inputStream
     * the input stream containing the content to tag, it must be NAF format
     * @param outputStream
     * the output stream providing the named entities
     * @throws IOException
     * exception if problems in input or output streams
     */
    public final void tagATE(final InputStream inputStream, final OutputStream outputStream)
            throws IOException, JDOMException {
        BufferedReader breader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
        BufferedWriter bwriter = new BufferedWriter(new OutputStreamWriter(outputStream, "UTF-8"));
        // read KAF document from inputstream
        KAFDocument naf = KAFDocument.createFromStream(breader);
        // load parameters into a properties
        String model = parsedArguments.getString("model");
        //String outputFormat = parsedArguments.getString("outputFormat");
        String lexer = parsedArguments.getString("lexer");
        String dictTag = parsedArguments.getString("dictTag");
        String dictPath = parsedArguments.getString("dictPath");
        // language parameter
        String lang = null;
        if (parsedArguments.getString("language") != null) {
            lang = parsedArguments.getString("language");
            if (!naf.getLang().equalsIgnoreCase(lang)) {
                System.err.println("Language parameter in NAF and CLI do not match!!");
                System.exit(1);
            }
        } else {
            lang = naf.getLang();
        }

        naf = NLPpipelineWrapper.ixaPipesNERC(naf, model, lexer, dictTag, dictPath);
        naf.save("entity-annotated.kaf");

        bwriter.close();
        breader.close();
    }

    /**
     * Main access to the polarity detection training functionalities.
     *
     * @throws IOException
     * input output exception if problems with corpora
     */
    public final void trainATP(final InputStream inputStream) throws IOException {
        // load training parameters file
        String paramFile = parsedArguments.getString("params");
        String corpusFormat = parsedArguments.getString("corpusFormat");
        String validation = parsedArguments.getString("validation");
        String lang = parsedArguments.getString("language");
        String classes = parsedArguments.getString("classnum");
        int foldNum = Integer.parseInt(parsedArguments.getString("foldNum"));
        //boolean printPreds = parsedArguments.getBoolean("printPreds");

        CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang);
        System.err.println("trainATP : Corpus read, creating features");
        Features atpTrain = new Features(reader, paramFile, classes);
        Instances traindata;
        if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) {
            traindata = atpTrain.loadInstancesTAB(true, "atp");
        } else if (corpusFormat.equalsIgnoreCase("tabNotagged") && lang.equalsIgnoreCase("eu")) {
            traindata = atpTrain.loadInstancesConll(true, "atp");
        } else {
            traindata = atpTrain.loadInstances(true, "atp");
        }

        //setting class attribute (entCat|attCat|entAttCat|polarityCat)
        traindata.setClass(traindata.attribute("polarityCat"));
        WekaWrapper classify;
        try {
            Properties params = new Properties();
            params.load(new FileInputStream(paramFile));
            String modelPath = params.getProperty("fVectorDir");
            classify = new WekaWrapper(traindata, true);
            classify.saveModel(modelPath + File.separator + "elixa-atp_" + lang + ".model");
            switch (validation) {
            case "cross":
                classify.crossValidate(foldNum);
                break;
            case "trainTest":
                classify.trainTest();
                break;
            case "both":
                classify.crossValidate(foldNum);
                classify.trainTest();
                break;
            default:
                System.out.println("train-atp: wrong validation option. Model saved but not tested");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * Create the main parameters available for training ATP models.
     */
    private void loadATPTrainingParameters() {
        trainATPParser.addArgument("-p", "--params").required(true).help("Load the training parameters file\n");
        trainATPParser.addArgument("-cvf", "--foldNum").required(false).setDefault(10)
                .help("Number of folds to run the cross validation on (default is 10).\n");
        trainATPParser.addArgument("-v", "--validation").required(false).choices("cross", "trainTest", "both")
                .setDefault("cross")
                .help("Choose the way the trained model will be validated\n"
                        + "\t - cross : 10 fold cross validation.\n"
                        + "\t - trainTest : 90% train / 10% test division.\n"
                        + "\t - both (default): both cross validation and train/test division will be tested.");
        trainATPParser.addArgument("-f", "--corpusFormat").required(false)
                .choices("semeval2015", "semeval2014", "tab", "tabglobal", "tabNotagged", "globalNotagged")
                .setDefault("semeval2015")
                .help("Choose format of reference corpus; it defaults to semeval2015 format.\n");
        trainATPParser.addArgument("-cn", "--classnum").required(false).choices("3", "3+", "5+", "5", "binary")
                .setDefault("3+none")
                .help("Choose the number of classes the classifier should work on "
                        + "(binary=p|n ; 3=p|n|neu ; 3+=p|n|neu|none ; 5=p|n|neu|p+|n+ ; 5+=p|n|neu|p+|n+|none )"
                        + " it defaults to 3 (p|n|neu).\n");
        trainATPParser.addArgument("-o", "--outputpredictions").action(Arguments.storeTrue()).setDefault("false")
                .help("Output predictions or not; output is the corpus annotated with semeval2015 format.\n");
        trainATPParser.addArgument("-l", "--language").setDefault("en")
                .choices("de", "en", "es", "eu", "it", "nl", "fr")
                .help("Choose language; if not provided it defaults to: \n"
                        + "\t- If the input format is NAF, the language value in incoming NAF file."
                        + "\t- en otherwise.\n");
    }

    /**
     * Main access to the polarity tagging functionalities. Target based polarity. 
     *
     * @throws IOException
     * input output exception if problems with corpora
     */
    public final void evalATP(final InputStream inputStream) throws IOException, JDOMException {

        String paramFile = parsedArguments.getString("params");
        String corpusFormat = parsedArguments.getString("corpusFormat");
        String model = parsedArguments.getString("model");
        String lang = parsedArguments.getString("language");
        String classnum = parsedArguments.getString("classnum");
        boolean ruleBased = parsedArguments.getBoolean("ruleBasedClassifier");
        boolean printPreds = parsedArguments.getBoolean("outputPredictions");

        //Read corpus sentences
        CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang);

        //Rule-based Classifier.
        if (ruleBased) {
            Properties params = new Properties();
            params.load(new FileInputStream(new File(paramFile)));

            String posModelPath = params.getProperty("pos-model");
            String lemmaModelPath = params.getProperty("lemma-model");
            String kafDir = params.getProperty("kafDir");

            /* polarity lexicon. Domain specific polarity lexicon is given priority.
             * If no domain lexicon is found it reverts to general polarity lexicon.
             * If no general polarity lexicon is found program exits with error message.
            */
            String lex = params.getProperty("polarLexiconDomain", "none");
            if (lex.equalsIgnoreCase("none")) {
                lex = params.getProperty("polarLexiconGeneral", "none");
                if (lex.equalsIgnoreCase("none")) {
                    System.err.println("Elixa Error :: Rule-based classifier is selected but no polarity"
                            + " lexicon has been specified. Either specify one or choose ML classifier");
                    System.exit(1);
                }
            }
            File lexFile = new File(lex);
            Evaluator evalDoc = new Evaluator(lexFile, "lemma");

            for (String oId : reader.getOpinions().keySet()) {
                // sentence posTagging
                String taggedKaf = reader.tagSentenceTab(reader.getOpinion(oId).getsId(), kafDir, posModelPath,
                        lemmaModelPath);
                //process the postagged sentence with the word count based polarity tagger
                Map<String, String> results = evalDoc.polarityScoreTab(taggedKaf, lexFile.getName());
                String lblStr = results.get("polarity");
                String actual = "?";
                if (reader.getOpinion(oId).getPolarity() != null) {
                    actual = reader.getOpinion(oId).getPolarity();
                }
                String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", "");
                System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + reader.getOpinionSentence(oId));
                reader.getOpinion(oId).setPolarity(lblStr);
            }
        }
        //ML Classifier (default)
        else {
            Features atpTest = new Features(reader, paramFile, classnum, model);
            Instances testdata;
            if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) {
                testdata = atpTest.loadInstancesTAB(true, "atp");
            } else {
                testdata = atpTest.loadInstances(true, "atp");
            }
            //   setting class attribute (entCat|attCat|entAttCat|polarityCat)
            testdata.setClass(testdata.attribute("polarityCat"));

            WekaWrapper classify;
            try {
                classify = new WekaWrapper(model);

                System.err.println("evalAtp : going to test the model");
                //sort according to the instanceId
                //traindata.sort(atpTrain.getAttIndexes().get("instanceId"));
                //Instances testdata = new Instances(traindata);
                //testdata.deleteAttributeAt(0);
                //classify.setTestdata(testdata);
                classify.setTestdata(testdata);
                classify.testModel(model);

                if (printPreds) {
                    for (String oId : reader.getOpinions().keySet()) {
                        int iId = atpTest.getOpinInst().get(oId);
                        Instance i = testdata.get(iId - 1);
                        double label = classify.getMLclass().classifyInstance(i);
                        String lblStr = i.classAttribute().value((int) label);
                        String actual = "?";
                        if (reader.getOpinion(oId).getPolarity() != null) {
                            actual = reader.getOpinion(oId).getPolarity();
                        }
                        String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", "");
                        String oSent = reader.getOpinionSentence(oId);
                        if (corpusFormat.startsWith("tab")) {
                            StringBuilder sb = new StringBuilder();
                            for (String kk : oSent.split("\n")) {
                                sb.append(kk.split("\\t")[0]);
                                sb.append(" ");
                            }
                            oSent = sb.toString();
                        }

                        System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + oSent + "\t"
                                + reader.getOpinionSentence(oId).replaceAll("\n", " ").replaceAll("\\t", ":::"));
                        reader.getOpinion(oId).setPolarity(lblStr);
                    }
                }
                //reader.print2Semeval2015format(model+"tagATP.xml");
                //reader.print2conll(model+"tagAtp.conll");
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * Create the main parameters available for training ATP models.
     */
    private void loadATPevalParameters() {
        evalATPParser.addArgument("-p", "--params").required(true).help("Load the training parameters file\n");
        evalATPParser.addArgument("-m", "--model").required(true).help("The pretrained model we want to test.\n");
        //evalATPParser.addArgument("-t", "--testset")
        //.required(false)
        //.help("The test corpus to evaluate our model.\n");
        evalATPParser.addArgument("-f", "--corpusFormat").required(false)
                .choices("semeval2015", "semeval2014", "tab", "tabglobal", "tabNotagged", "globalNotagged")
                .setDefault("semeval2015")
                .help("Choose format of the test corpus; it defaults to semeval2015 format.\n");
        evalATPParser.addArgument("-cn", "--classnum").required(false).choices("3", "3+", "5+", "5", "binary")
                .setDefault("3+none")
                .help("Choose the number of classes the classifier should work on "
                        + "(binary=p|n ; 3=p|n|neu ; 3+=p|n|neu|none ; 5=p|n|neu|p+|n+ ; 5+=p|n|neu|p+|n+|none )"
                        + " it defaults to 3 (p|n|neu).\n");
        evalATPParser.addArgument("-r", "--ruleBasedClassifier").action(Arguments.storeTrue()).setDefault(false)
                .help("Whether rule based classifier should be used instead of the default ML classifier."
                        + " A polarity lexicon is mandatory if the rule based classifier is used.\n");
        evalATPParser.addArgument("-o", "--outputPredictions").action(Arguments.storeTrue()).setDefault(false)
                .help("Output predictions or not; output is the corpus annotated with semeval2015 format.\n");
        evalATPParser.addArgument("-l", "--language").setDefault("en")
                .choices("de", "en", "es", "eu", "it", "nl", "fr")
                .help("Choose language; if not provided it defaults to: \n"
                        + "\t- If the input format is NAF, the language value in incoming NAF file."
                        + "\t- en otherwise.\n");
    }

    /**
     * Main access to the polarity tagging functionalities. Target based polarity. 
     *
     * @throws IOException
     * input output exception if problems with corpora
     * @throws JDOMException 
     */
    public final void tagATP(final InputStream inputStream) throws IOException, JDOMException {
        // load training parameters file
        String paramFile = parsedArguments.getString("params");
        String corpusFormat = parsedArguments.getString("corpusFormat");
        String model = parsedArguments.getString("model");
        String lang = parsedArguments.getString("language");
        String classnum = parsedArguments.getString("classnum");
        boolean ruleBased = parsedArguments.getBoolean("ruleBasedClassifier");

        //Read corpus sentences
        CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang);

        //Rule-based Classifier.
        if (ruleBased) {
            Properties params = new Properties();
            params.load(new FileInputStream(new File(paramFile)));

            String posModelPath = params.getProperty("pos-model");
            String lemmaModelPath = params.getProperty("lemma-model");
            String kafDir = params.getProperty("kafDir");

            /* polarity lexicon. Domain specific polarity lexicon is given priority.
             * If no domain lexicon is found it reverts to general polarity lexicon.
             * If no general polarity lexicon is found program exits with error message.
            */
            String lex = params.getProperty("polarLexiconDomain", "none");
            if (lex.equalsIgnoreCase("none")) {
                lex = params.getProperty("polarLexiconGeneral", "none");
                if (lex.equalsIgnoreCase("none")) {
                    System.err.println("Elixa Error :: Rule-based classifier is selected but no polarity"
                            + " lexicon has been specified. Either specify one or choose ML classifier");
                    System.exit(1);
                }
            }
            File lexFile = new File(lex);
            Evaluator evalDoc = new Evaluator(lexFile, "lemma");

            for (String oId : reader.getOpinions().keySet()) {
                // sentence posTagging
                String taggedKaf = reader.tagSentenceTab(reader.getOpinion(oId).getsId(), kafDir, posModelPath,
                        lemmaModelPath);
                //process the postagged sentence with the word count based polarity tagger
                Map<String, String> results = evalDoc.polarityScoreTab(taggedKaf, lexFile.getName());
                String lblStr = results.get("polarity");
                String actual = "?";
                if (reader.getOpinion(oId).getPolarity() != null) {
                    actual = reader.getOpinion(oId).getPolarity();
                }
                String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", "");
                System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + reader.getOpinionSentence(oId));
                reader.getOpinion(oId).setPolarity(lblStr);
            }
        } else {
            Features atpTrain = new Features(reader, paramFile, classnum, model);
            Instances traindata;
            if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) {
                traindata = atpTrain.loadInstancesTAB(true, "atp");
            } else if (lang.equalsIgnoreCase("eu")
                    && (corpusFormat.equalsIgnoreCase("tabNotagged") || corpusFormat.equalsIgnoreCase("ireom"))) {
                traindata = atpTrain.loadInstancesConll(true, "atp");
            } else {
                traindata = atpTrain.loadInstances(true, "atp");
            }

            //   setting class attribute (entCat|attCat|entAttCat|polarityCat)
            traindata.setClass(traindata.attribute("polarityCat"));

            WekaWrapper classify;
            try {
                classify = new WekaWrapper(model);

                System.err.println();
                //sort according to the instanceId
                //traindata.sort(atpTrain.getAttIndexes().get("instanceId"));
                //Instances testdata = new Instances(traindata);
                //testdata.deleteAttributeAt(0);
                //classify.setTestdata(testdata);
                classify.setTestdata(traindata);
                classify.loadModel(model);

                for (String oId : reader.getOpinions().keySet()) {
                    int iId = atpTrain.getOpinInst().get(oId);
                    Instance i = traindata.get(iId - 1);
                    double label = classify.getMLclass().classifyInstance(i);
                    String lblStr = i.classAttribute().value((int) label);
                    String actual = "?";
                    if (reader.getOpinion(oId).getPolarity() != null) {
                        actual = reader.getOpinion(oId).getPolarity();
                    }
                    String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", "");
                    String oSent = reader.getOpinionSentence(oId);
                    if (corpusFormat.startsWith("tab")) {
                        StringBuilder sb = new StringBuilder();
                        for (String kk : oSent.split("\n")) {
                            sb.append(kk.split("\\t")[0]);
                            sb.append(" ");
                        }
                        oSent = sb.toString();
                    }

                    System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + oSent + "\t"
                            + reader.getOpinionSentence(oId).replaceAll("\n", " ").replaceAll("\\t", ":::"));
                    reader.getOpinion(oId).setPolarity(lblStr);
                }

                //reader.print2Semeval2015format(model+"tagATP.xml");
                //reader.print2conll(model+"tagAtp.conll");
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * Create the main parameters available for training ATP models.
     */
    private void loadATPtagParameters() {
        tagATPParser.addArgument("-p", "--params").required(true).help("Load the training parameters file\n");
        tagATPParser.addArgument("-f", "--corpusFormat").required(false)
                .choices("semeval2015", "semeval2014", "tab", "tabglobal", "tabNotagged", "ireom", "globalNotagged")
                .setDefault("semeval2015")
                .help("Choose format of reference corpus; it defaults to semeval2015 format.\n");
        tagATPParser.addArgument("-m", "--model").required(true).help(
                "Pre trained model to classify corpus opinions with. Features are extracted from the model\n");
        tagATPParser.addArgument("-r", "--ruleBasedClassifier").action(Arguments.storeTrue()).setDefault(false)
                .help("Whether rule based classifier should be used instead of the default ML classifier."
                        + " A polarity lexicon is mandatory if the rule based classifier is used.\n");
        tagATPParser.addArgument("-cn", "--classnum").required(false).choices("3", "3+", "5+", "5", "binary")
                .setDefault("3+none")
                .help("Choose the number of classes the classifier should work on "
                        + "(binary=p|n ; 3=p|n|neu ; 3+=p|n|neu|none ; 5=p|n|neu|p+|n+ ; 5+=p|n|neu|p+|n+|none )"
                        + " it defaults to 3 (p|n|neu).\n");
        tagATPParser.addArgument("-l", "--language").setDefault("en")
                .choices("de", "en", "es", "eu", "it", "nl", "fr")
                .help("Choose language; if not provided it defaults to: \n"
                        + "\t- If the input format is NAF, the language value in incoming NAF file."
                        + "\t- en otherwise.\n");
    }

    /**
     * Format ixa-pipes based ATE results to Semeval 2015 format.  
     * 
     * @param inputStream
     * @throws IOException
     */
    public final void slot2(final InputStream inputStream) throws IOException {
        // load training parameters file
        //String paramFile = parsedArguments.getString("params");
        String corpusFormat = parsedArguments.getString("corpusFormat");
        String naf = parsedArguments.getString("naf");
        String lang = parsedArguments.getString("lang");

        CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang);
        if (!FileUtilsElh.checkFile(naf)) {
            System.err.println("Error when trying to read from directory containing de annotations.");
            System.exit(2);
        }

        try {
            reader.slot2opinionsFromAnnotations(naf);

        } catch (Exception e) {
            e.printStackTrace();
        }

    }

    /**
     * Main access to the train-atc functionalities.
     * Train ATC using a single classifier (one vs. all) for E#A aspect categories.
     * 
     * @throws Exception 
     */
    public final void trainATC(final InputStream inputStream) throws IOException {
        // load training parameters file
        String paramFile = parsedArguments.getString("params");
        String corpusFormat = parsedArguments.getString("corpusFormat");
        //String validation = parsedArguments.getString("validation");
        int foldNum = Integer.parseInt(parsedArguments.getString("foldNum"));
        String lang = parsedArguments.getString("language");
        //boolean printPreds = parsedArguments.getBoolean("printPreds");
        boolean nullSentenceOpinions = parsedArguments.getBoolean("nullSentences");
        //double threshold = 0.2;
        //String modelsPath = "/home/inaki/Proiektuak/BOM/SEMEVAL2015/ovsaModels";

        CorpusReader reader = new CorpusReader(inputStream, corpusFormat, nullSentenceOpinions, lang);
        Features atcTrain = new Features(reader, paramFile, "3");
        Instances traindata = atcTrain.loadInstances(true, "atc");

        //setting class attribute (entCat|attCat|entAttCat|polarityCat)

        //HashMap<String, Integer> opInst = atcTrain.getOpinInst();
        WekaWrapper classifyEnts;
        WekaWrapper classifyAtts;
        //WekaWrapper onevsall;
        try {
            //train first classifier (entities)
            Instances traindataEnt = new Instances(traindata);
            // IMPORTANT: filter indexes are added 1 because weka remove function counts attributes from 1, 
            traindataEnt.setClassIndex(traindataEnt.attribute("entCat").index());
            classifyEnts = new WekaWrapper(traindataEnt, true);
            String filtRange = String.valueOf(traindata.attribute("attCat").index() + 1) + ","
                    + String.valueOf(traindata.attribute("entAttCat").index() + 1);
            classifyEnts.filterAttribute(filtRange);

            System.out.println("trainATC: entity classifier results -> ");
            classifyEnts.crossValidate(foldNum);
            classifyEnts.saveModel("elixa-atc_ent-" + lang + ".model");

            //Classifier entityCl = classify.getMLclass();

            //train second classifier (attributes)
            Instances traindataAtt = new Instances(traindata);
            traindataAtt.setClassIndex(traindataAtt.attribute("attCat").index());
            classifyAtts = new WekaWrapper(traindataAtt, true);
            filtRange = String.valueOf(traindataAtt.attribute("entAttCat").index() + 1);
            classifyAtts.filterAttribute(filtRange);

            System.out.println("trainATC: attribute classifier results -> ");
            classifyAtts.crossValidate(foldNum);
            classifyAtts.saveModel("elixa-atc_att-" + lang + ".model");
            /*
            Instances traindataEntadded = classifyEnts.addClassification(classifyEnts.getMLclass(), traindataEnt);
            //train second classifier (entCat attributes will have the values of the entities always)
            traindataEntadded.setClassIndex(traindataEntadded.attribute("attCat").index());
            WekaWrapper classify2 = new WekaWrapper(traindataEntadded, true);
            System.out.println("trainATC: enhanced attribute classifier results -> ");
            classify2.saveModel("elixa-atc_att_enhanced.model");
            classify2.crossValidate(foldNum);      
            */
            //classify.printMultilabelPredictions(classify.multiLabelPrediction());      */   

            //reader.print2Semeval2015format(paramFile+"entAttCat.xml");
        } catch (Exception e) {
            e.printStackTrace();
        }

        //traindata.setClass(traindata.attribute("entAttCat"));
        System.err.println("DONE CLI train-atc");
    }

    /**
     * Main access to the train-atc functionalities. Train ATC using a double one vs. all classifier
     * (E and A) for E#A aspect categories
     * @throws Exception 
     */
    public final void trainATC2(final InputStream inputStream) throws IOException {
        // load training parameters file
        String paramFile = parsedArguments.getString("params");
        String testFile = parsedArguments.getString("testset");
        String paramFile2 = parsedArguments.getString("params2");
        String corpusFormat = parsedArguments.getString("corpusFormat");
        //String validation = parsedArguments.getString("validation");
        String lang = parsedArguments.getString("language");
        //int foldNum = Integer.parseInt(parsedArguments.getString("foldNum"));
        //boolean printPreds = parsedArguments.getBoolean("printPreds");
        boolean nullSentenceOpinions = parsedArguments.getBoolean("nullSentences");
        boolean onlyTest = parsedArguments.getBoolean("testOnly");
        double threshold = 0.5;
        double threshold2 = 0.5;
        String modelsPath = "/home/inaki/elixa-atp/ovsaModels";

        CorpusReader reader = new CorpusReader(inputStream, corpusFormat, nullSentenceOpinions, lang);
        Features atcTrain = new Features(reader, paramFile, "3");
        Instances traindata = atcTrain.loadInstances(true, "atc");

        if (onlyTest) {
            if (FileUtilsElh.checkFile(testFile)) {
                System.err.println("read from test file");
                reader = new CorpusReader(new FileInputStream(new File(testFile)), corpusFormat,
                        nullSentenceOpinions, lang);
                atcTrain.setCorpus(reader);
                traindata = atcTrain.loadInstances(true, "atc");
            }
        }

        //setting class attribute (entCat|attCat|entAttCat|polarityCat)

        //HashMap<String, Integer> opInst = atcTrain.getOpinInst();      
        //WekaWrapper classifyAtts;
        WekaWrapper onevsall;
        try {

            //classify.printMultilabelPredictions(classify.multiLabelPrediction());      */   

            //onevsall
            Instances entdata = new Instances(traindata);
            entdata.deleteAttributeAt(entdata.attribute("attCat").index());
            entdata.deleteAttributeAt(entdata.attribute("entAttCat").index());
            entdata.setClassIndex(entdata.attribute("entCat").index());
            onevsall = new WekaWrapper(entdata, true);

            if (!onlyTest) {
                onevsall.trainOneVsAll(modelsPath, paramFile + "entCat");
                System.out.println("trainATC: one vs all models ready");
            }
            onevsall.setTestdata(entdata);
            HashMap<Integer, HashMap<String, Double>> ovsaRes = onevsall.predictOneVsAll(modelsPath,
                    paramFile + "entCat");
            System.out.println("trainATC: one vs all predictions ready");
            HashMap<Integer, String> instOps = new HashMap<Integer, String>();
            for (String oId : atcTrain.getOpinInst().keySet()) {
                instOps.put(atcTrain.getOpinInst().get(oId), oId);
            }

            atcTrain = new Features(reader, paramFile2, "3");
            entdata = atcTrain.loadInstances(true, "attTrain2_data");
            entdata.deleteAttributeAt(entdata.attribute("entAttCat").index());
            //entdata.setClassIndex(entdata.attribute("entCat").index());

            Attribute insAtt = entdata.attribute("instanceId");
            double maxInstId = entdata.kthSmallestValue(insAtt, entdata.numDistinctValues(insAtt) - 1);
            System.err.println("last instance has index: " + maxInstId);
            for (int ins = 0; ins < entdata.numInstances(); ins++) {
                System.err.println("ins" + ins);
                int i = (int) entdata.instance(ins).value(insAtt);
                Instance currentInst = entdata.instance(ins);
                //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i));
                String sId = reader.getOpinion(instOps.get(i)).getsId();
                String oId = instOps.get(i);
                reader.removeSentenceOpinions(sId);
                int oSubId = 0;
                for (String cl : ovsaRes.get(i).keySet()) {
                    //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                    if (ovsaRes.get(i).get(cl) > threshold) {
                        //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));                  
                        // for the first one update the instances
                        if (oSubId >= 1) {
                            Instance newIns = new SparseInstance(currentInst);
                            newIns.setDataset(entdata);
                            entdata.add(newIns);
                            newIns.setValue(insAtt, maxInstId + oSubId);
                            newIns.setClassValue(cl);
                            instOps.put((int) maxInstId + oSubId, oId);

                        }
                        // if the are more create new instances
                        else {
                            currentInst.setClassValue(cl);
                            //create and add opinion to the structure
                            //   trgt, offsetFrom, offsetTo, polarity, cat, sId);
                            //Opinion op = new Opinion(instOps.get(i)+"_"+oSubId, "", 0, 0, "", cl, sId);
                            //reader.addOpinion(op);
                        }
                        oSubId++;
                    }
                } //finished updating instances data                                    
            }

            entdata.setClass(entdata.attribute("attCat"));
            onevsall = new WekaWrapper(entdata, true);

            /**
             *  Bigarren sailkatzailea
             * 
             * */
            if (!onlyTest) {
                onevsall.trainOneVsAll(modelsPath, paramFile + "attCat");
                System.out.println("trainATC: one vs all attcat models ready");
            }

            ovsaRes = onevsall.predictOneVsAll(modelsPath, paramFile + "entAttCat");

            insAtt = entdata.attribute("instanceId");
            maxInstId = entdata.kthSmallestValue(insAtt, insAtt.numValues());
            System.err.println("last instance has index: " + maxInstId);
            for (int ins = 0; ins < entdata.numInstances(); ins++) {
                System.err.println("ins: " + ins);
                int i = (int) entdata.instance(ins).value(insAtt);
                Instance currentInst = entdata.instance(ins);
                //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i));
                String sId = reader.getOpinion(instOps.get(i)).getsId();
                String oId = instOps.get(i);
                reader.removeSentenceOpinions(sId);
                int oSubId = 0;
                for (String cl : ovsaRes.get(i).keySet()) {
                    //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                    if (ovsaRes.get(i).get(cl) > threshold2) {
                        ///System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                        if (ovsaRes.get(i).get(cl) > threshold) {
                            //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));                  
                            // for the first one update the instances
                            if (oSubId >= 1) {
                                String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl;
                                //create and add opinion to the structure
                                //   trgt, offsetFrom, offsetTo, polarity, cat, sId);                     
                                Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId);
                                reader.addOpinion(op);
                            }
                            // if the are more create new instances
                            else {
                                String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl;
                                //create and add opinion to the structure
                                //   trgt, offsetFrom, offsetTo, polarity, cat, sId);
                                reader.removeOpinion(oId);
                                Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId);
                                reader.addOpinion(op);
                            }
                            oSubId++;
                        }
                    } //finished updating instances data                                    
                }
            }
            reader.print2Semeval2015format(paramFile + "entAttCat.xml");
        } catch (Exception e) {
            e.printStackTrace();
        }

        //traindata.setClass(traindata.attribute("entAttCat"));
        System.err.println("DONE CLI train-atc2 (oneVsAll)");
    }

    /**
     * train ATC using a single classifier (one vs. all) for E#A aspect categories.
     * 
     * @param inputStream
     * @throws IOException
     */
    public final void trainATCsingleCategory(final InputStream inputStream) throws IOException {
        // load training parameters file
        String paramFile = parsedArguments.getString("params");
        String testFile = parsedArguments.getString("testset");
        String corpusFormat = parsedArguments.getString("corpusFormat");
        //String validation = parsedArguments.getString("validation");
        String lang = parsedArguments.getString("language");
        //int foldNum = Integer.parseInt(parsedArguments.getString("foldNum"));
        //boolean printPreds = parsedArguments.getBoolean("printPreds");
        boolean nullSentenceOpinions = parsedArguments.getBoolean("nullSentences");
        boolean onlyTest = parsedArguments.getBoolean("testOnly");
        double threshold = 0.5;

        String modelsPath = "/home/inaki/Proiektuak/BOM/SEMEVAL2015/ovsaModels";

        CorpusReader reader = new CorpusReader(inputStream, corpusFormat, nullSentenceOpinions, lang);
        Features atcTrain = new Features(reader, paramFile, "3");
        Instances traindata = atcTrain.loadInstances(true, "atc");

        if (onlyTest) {
            if (FileUtilsElh.checkFile(testFile)) {
                System.err.println("read from test file");
                reader = new CorpusReader(new FileInputStream(new File(testFile)), corpusFormat,
                        nullSentenceOpinions, lang);
                atcTrain.setCorpus(reader);
                traindata = atcTrain.loadInstances(true, "atc");
            }
        }

        //setting class attribute (entCat|attCat|entAttCat|polarityCat)

        //HashMap<String, Integer> opInst = atcTrain.getOpinInst();
        //WekaWrapper classifyEnts;
        //WekaWrapper classifyAtts;
        WekaWrapper onevsall;
        try {

            //classify.printMultilabelPredictions(classify.multiLabelPrediction());      */   

            //onevsall
            //Instances entdata = new Instances(traindata);
            traindata.deleteAttributeAt(traindata.attribute("attCat").index());
            traindata.deleteAttributeAt(traindata.attribute("entCat").index());
            traindata.setClassIndex(traindata.attribute("entAttCat").index());
            onevsall = new WekaWrapper(traindata, true);

            if (!onlyTest) {
                onevsall.trainOneVsAll(modelsPath, paramFile + "entAttCat");
                System.out.println("trainATC: one vs all models ready");
            }
            onevsall.setTestdata(traindata);
            HashMap<Integer, HashMap<String, Double>> ovsaRes = onevsall.predictOneVsAll(modelsPath,
                    paramFile + "entAttCat");
            System.out.println("trainATC: one vs all predictions ready");
            HashMap<Integer, String> kk = new HashMap<Integer, String>();
            for (String oId : atcTrain.getOpinInst().keySet()) {
                kk.put(atcTrain.getOpinInst().get(oId), oId);
            }

            Object[] ll = ovsaRes.get(1).keySet().toArray();
            for (Object l : ll) {
                System.err.print((String) l + " - ");
            }
            System.err.print("\n");

            for (int i : ovsaRes.keySet()) {
                //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i));
                String sId = reader.getOpinion(kk.get(i)).getsId();
                reader.removeSentenceOpinions(sId);
                int oSubId = 0;
                for (String cl : ovsaRes.get(i).keySet()) {
                    //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                    if (ovsaRes.get(i).get(cl) > threshold) {
                        //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                        oSubId++;
                        //create and add opinion to the structure
                        //trgt, offsetFrom, offsetTo, polarity, cat, sId);
                        Opinion op = new Opinion(kk.get(i) + "_" + oSubId, "", 0, 0, "", cl, sId);
                        reader.addOpinion(op);
                    }
                }
            }
            reader.print2Semeval2015format(paramFile + "entAttCat.xml");
        } catch (Exception e) {
            e.printStackTrace();
        }

        //traindata.setClass(traindata.attribute("entAttCat"));
        System.err.println("DONE CLI train-atc2 (oneVsAll)");
    }

    /**
     * Main access to the train functionalities.
     * @throws Exception 
     */
    public final void tagATC(final InputStream inputStream) throws IOException {
        // load training parameters file
        String paramFile = parsedArguments.getString("params");
        String corpusFormat = parsedArguments.getString("corpusFormat");
        //String validation = parsedArguments.getString("validation");
        String lang = parsedArguments.getString("language");
        int foldNum = Integer.parseInt(parsedArguments.getString("foldNum"));
        //boolean printPreds = parsedArguments.getBoolean("printPreds");

        CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang);
        Features atcTrain = new Features(reader, paramFile, "3");
        Instances traindata = atcTrain.loadInstances(true, "atc");

        //setting class attribute (entCat|attCat|entAttCat|polarityCat)

        //HashMap<String, Integer> opInst = atcTrain.getOpinInst();
        WekaWrapper classify;
        try {
            //train first classifier (entities)
            traindata.setClass(traindata.attribute("entCat"));
            classify = new WekaWrapper(traindata, true);
            classify.crossValidate(foldNum);
            //Classifier entityCl = classify.getMLclass().;

            //train second classifier (attributtes)
            traindata.setClass(traindata.attribute("attCat"));
            classify.setTraindata(traindata);
            classify.crossValidate(foldNum);
            //Classifier attCl = classify.getMLclass();

            classify.printMultilabelPredictions(classify.multiLabelPrediction());

        } catch (Exception e) {
            e.printStackTrace();
        }

        traindata.setClass(traindata.attribute("entAttCat"));
        System.err.println("DONE CLI train-atc");
    }

    /**
     * Create the available parameters for ATP tagging.
     */
    private void loadAnnotateParameters() {
        annotateParser.addArgument("-m", "--model").required(true)
                .help("Pass the model to do the tagging as a parameter.\n");
        annotateParser.addArgument("-l", "--language").required(false)
                .choices("de", "en", "es", "eu", "it", "nl", "fr")
                .help("Choose language; it defaults to the language value in incoming NAF file.\n");
        annotateParser.addArgument("-o", "--outputFormat").required(false).choices("semeval2015", "naf")
                .setDefault("semeval2015").help("Choose output format; it defaults to semeval2015.\n");
        annotateParser.addArgument("--dictTag").required(false).choices("tag", "post").setDefault("post")
                .help("Choose to directly tag entities by dictionary look-up; if the 'tag' option is chosen, "
                        + "only tags entities found in the dictionary; if 'post' option is chosen, it will "
                        + "post-process the results of the statistical model.\n");
        annotateParser.addArgument("--dictPath").required(false).setDefault("").help(
                "Provide the path to the dictionaries for direct dictionary tagging; it ONLY WORKS if --dictTag "
                        + "option is activated.\n");
    }

    /**
     * Create the main parameters available for tagging slot2 semeval2015.
     */
    private void loadslot2Parameters() {
        slot2Parser.addArgument("-p", "--params").required(false).help("Load the training parameters file\n");
        slot2Parser.addArgument("-f", "--corpusFormat").required(false).choices("semeval2015", "semeval2014", "tab")
                .setDefault("semeval2015")
                .help("Choose format of reference corpus; it defaults to semeval2015 format.\n");
        slot2Parser.addArgument("-n", "--naf").required(true).help("tagged naf file path.\n");
        slot2Parser.addArgument("-l", "--language").setDefault("en")
                .choices("de", "en", "es", "eu", "it", "nl", "fr")
                .help("Choose language; if not provided it defaults to: \n"
                        + "\t- If the input format is NAF, the language value in incoming NAF file."
                        + "\t- en otherwise.\n");
    }

    /**
     * Create the main parameters available for training ATP models.
     */
    private void loadATCTrainingParameters() {
        trainATCParser.addArgument("-p", "--params").required(true).help("Load the training parameters file\n");
        trainATCParser.addArgument("-t", "--testset").required(false).help("The test or reference corpus.\n");
        trainATCParser.addArgument("-cvf", "--foldNum").required(false).setDefault(10)
                .help("Number of folds to run the cross validation on.\n");
        trainATCParser.addArgument("-v", "--validation").required(false).choices("cross", "trainTest", "both")
                .setDefault("both")
                .help("Choose the way the trained model will be validated\n"
                        + "\t - cross : 10 fold cross validation.\n"
                        + "\t - trainTest : 90% train / 10% test division.\n"
                        + "\t - both (default): both cross validation and train/test division will be tested.");
        trainATCParser.addArgument("-f", "--corpusFormat").required(false)
                .choices("semeval2015", "semeval2014", "tab").setDefault("semeval2015")
                .help("Choose format of reference corpus; it defaults to semeval2015 format.\n");
        trainATCParser.addArgument("-n", "--nullSentences").action(Arguments.storeTrue()).required(false)
                .setDefault(false)
                .help("Whether null examples should be generated from sentences without categories or not.\n");
        trainATCParser.addArgument("-o", "--outputpredictions").action(Arguments.storeTrue()).setDefault(false)
                .help("Output predictions or not; output is the corpus annotated with semeval2015 format.\n");
        trainATCParser.addArgument("-l", "--language").setDefault("en")
                .choices("de", "en", "es", "eu", "it", "nl", "fr")
                .help("Choose language; if not provided it defaults to: \n"
                        + "\t- If the input format is NAF, the language value in incoming NAF file."
                        + "\t- en otherwise.\n");
    }

    /**
     * Create the main parameters available for training ATP models.
     */
    private void loadATC2TrainingParameters() {
        trainATC2Parser.addArgument("-p", "--params").required(true).help("Load the training parameters file\n");
        trainATC2Parser.addArgument("-p2", "--params2").required(false).help("Load the training parameters file\n");
        trainATC2Parser.addArgument("-t", "--testset").required(false).help("The test or reference corpus.\n");
        trainATC2Parser.addArgument("-cvf", "--foldNum").required(false).setDefault(10)
                .help("Number of folds to run the cross validation on.\n");
        trainATC2Parser.addArgument("-v", "--validation").required(false).choices("cross", "trainTest", "both")
                .setDefault("both")
                .help("Choose the way the trained model will be validated\n"
                        + "\t - cross : 10 fold cross validation.\n"
                        + "\t - trainTest : 90% train / 10% test division.\n"
                        + "\t - both (default): both cross validation and train/test division will be tested.");
        trainATC2Parser.addArgument("-f", "--corpusFormat").required(false)
                .choices("semeval2015", "semeval2014", "tab").setDefault("semeval2015")
                .help("Choose format of reference corpus; it defaults to semeval2015 format.\n");
        trainATC2Parser.addArgument("-n", "--nullSentences").action(Arguments.storeTrue()).required(false)
                .setDefault(false)
                .help("Whether null examples should be generated from sentences without categories or not.\n");
        trainATC2Parser.addArgument("-to", "--testOnly").action(Arguments.storeTrue()).required(false)
                .setDefault(false)
                .help("Whether only test should be done (assumes models were previously generated).\n");
        trainATC2Parser.addArgument("-o", "--outputpredictions").action(Arguments.storeTrue()).setDefault(false)
                .help("Output predictions or not; output is the corpus annotated with semeval2015 format.\n");
        trainATC2Parser.addArgument("-l", "--language").setDefault("en")
                .choices("de", "en", "es", "eu", "it", "nl", "fr")
                .help("Choose language; if not provided it defaults to: \n"
                        + "\t- If the input format is NAF, the language value in incoming NAF file."
                        + "\t- en otherwise.\n");
    }

}