Example usage for opennlp.tools.doccat DocumentCategorizer categorize

Introduction

In this page you can find the example usage for opennlp.tools.doccat DocumentCategorizer categorize.

Prototype

double[] categorize(String[] text);

Source Link

Document

Categorizes the given text, provided in separate tokens.

Usage

From source file:com.tamingtext.classifier.maxent.TestMaxent.java

private static void runTest(File[] inputFiles, DocumentCategorizer categorizer, Tokenizer tokenizer,
        ResultAnalyzer resultAnalyzer) throws FileNotFoundException, IOException {
    String line;// w  ww.  ja v  a 2s.  co  m
    //<start id="maxent.examples.test.execute"/>
    for (File ff : inputFiles) {
        BufferedReader in = new BufferedReader(new FileReader(ff));
        while ((line = in.readLine()) != null) {
            String[] parts = line.split("\t");
            if (parts.length != 2)
                continue;

            String docText = parts[1]; //<co id="tmt.preprocess"/>
            String[] tokens = tokenizer.tokenize(docText);

            double[] probs = categorizer.categorize(tokens); //<co id="tmt.categorize"/>
            String label = categorizer.getBestCategory(probs);
            int bestIndex = categorizer.getIndex(label);
            double score = probs[bestIndex];

            ClassifierResult result //<co id="tmt.collect"/>
                    = new ClassifierResult(label, score);
            resultAnalyzer.addInstance(parts[0], result);
        }
        in.close();
    }

    System.err.println(resultAnalyzer.toString()); //<co id="tmt.summarize"/>
    /*<calloutlist>
     * <callout arearefs="tmt.preprocess">Preprocess text</callout>
     * <callout arearefs="tmt.categorize">Categorize</callout>
     * <callout arearefs="tmt.collect">Analyze Results</callout>
     * <callout arearefs="tmt.summarize">Present Results</callout>
     * </calloutlist>*/
    //<end id="maxent.examples.test.execute"/>
}