opennlp.tools.parse_thicket.opinion_processor.AbstractEngineRunner.java Source code

Introduction

Here is the source code for opennlp.tools.parse_thicket.opinion_processor.AbstractEngineRunner.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package opennlp.tools.parse_thicket.opinion_processor;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;

import opennlp.tools.jsmlearning.ProfileReaderWriter;
import opennlp.tools.parse_thicket.ParseTreeNode;
import opennlp.tools.textsimilarity.ParseTreeChunk;

public class AbstractEngineRunner {
    private List<File> queue;
    private final static String reviewSource = "/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources/opinions/macbook_pro.txt";
    NamedEntityExtractor neExtractor = new NamedEntityExtractor();

    public void processJSONfileWithReviews() {
        List<String[]> report = new ArrayList<String[]>();
        report.add(new String[] { "text", "phrases of potential interest list", });

        String content = null;
        try {
            content = FileUtils.readFileToString(new File(reviewSource));
        } catch (IOException e) {
            e.printStackTrace();
        }
        String[] texts = StringUtils.substringsBetween(content, "reviewText\": \"", "\", \"overall");
        for (String text : texts) {
            EntityExtractionResult result = neExtractor.extractEntities(text);
            report.add(new String[] { text });
            // report.add((String[])result.extractedNERWords.toArray(new
            // String[0]));
            // report.add((String[])result.extractedSentimentPhrases.toArray(new
            // String[0]));
            List<String> stringPhrases = new ArrayList<String>(), nodePhrases = new ArrayList<String>();
            for (List<ParseTreeNode> chList : result.extractedSentimentPhrases) {
                String buf = "", nodeBuf = "";
                for (ParseTreeNode ch : chList) {
                    buf += ch.getWord() + " ";
                    nodeBuf += ch.toString() + " ";
                }
                stringPhrases.add(buf.trim());
                nodePhrases.add(nodeBuf.trim());
            }
            report.add((String[]) stringPhrases.toArray(new String[0]));
            report.add((String[]) nodePhrases.toArray(new String[0]));
            report.add(new String[] { "-----------------------------" });
            ProfileReaderWriter.writeReport(report, "nameEntitiesTopicsOfInterestExtracted.csv");
        }
    }

    // this func collects files
    private void addFiles(File file) {

        if (!file.exists()) {
            System.out.println(file + " does not exist.");
        }
        if (file.isDirectory()) {
            for (File f : file.listFiles()) {
                if (f.getName().startsWith("."))
                    continue;
                addFiles(f);
                System.out.println(f.getName());
            }
        } else {
            queue.add(file);

        }
    }

    public static void main(String[] args) {
        AbstractEngineRunner runner = new AbstractEngineRunner();
        runner.processJSONfileWithReviews();

    }
}

/*
 * public void processDirectory(String path){ List<String[]> report = new
 * ArrayList<String[]>(); report.add(new String[] { "filename",
 * "named entity list", "phrases of potential interest list" });
 * 
 * List<String> allNamedEntities = new ArrayList<String>();
 * 
 * addFiles(new File(path)); for(File f: queue){ List<String> entities =
 * (List<String>) extractEntities(f.getAbsolutePath()).getFirst(); List<String>
 * opinions = (List<String>) extractEntities(f.getAbsolutePath()).getSecond();
 * report.add(new String[]{ f.getName(), entities.toString(),
 * opinions.toString()}); ProfileReaderWriter.writeReport(report,
 * "nameEntitiesExtracted.csv");
 * 
 * allNamedEntities.addAll(entities);
 * 
 * allNamedEntities = new ArrayList<String>(new HashSet<String>
 * (allNamedEntities ));
 * 
 * 
 * } ProfileReaderWriter.writeReport(report,
 * "nameEntitiesTopicsOfInterestExtracted.csv"); } }
 */