Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package opennlp.tools.parse_thicket.opinion_processor; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import opennlp.tools.jsmlearning.ProfileReaderWriter; import opennlp.tools.parse_thicket.ParseTreeNode; import opennlp.tools.textsimilarity.ParseTreeChunk; public class AbstractEngineRunner { private List<File> queue; private final static String reviewSource = "/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources/opinions/macbook_pro.txt"; NamedEntityExtractor neExtractor = new NamedEntityExtractor(); public void processJSONfileWithReviews() { List<String[]> report = new ArrayList<String[]>(); report.add(new String[] { "text", "phrases of potential interest list", }); String content = null; try { content = FileUtils.readFileToString(new File(reviewSource)); } catch (IOException e) { e.printStackTrace(); } String[] texts = StringUtils.substringsBetween(content, "reviewText\": \"", "\", \"overall"); for (String text : texts) { EntityExtractionResult result = neExtractor.extractEntities(text); report.add(new String[] { text }); // report.add((String[])result.extractedNERWords.toArray(new // String[0])); // report.add((String[])result.extractedSentimentPhrases.toArray(new // String[0])); List<String> stringPhrases = new ArrayList<String>(), nodePhrases = new ArrayList<String>(); for (List<ParseTreeNode> chList : result.extractedSentimentPhrases) { String buf = "", nodeBuf = ""; for (ParseTreeNode ch : chList) { buf += ch.getWord() + " "; nodeBuf += ch.toString() + " "; } stringPhrases.add(buf.trim()); nodePhrases.add(nodeBuf.trim()); } report.add((String[]) stringPhrases.toArray(new String[0])); report.add((String[]) nodePhrases.toArray(new String[0])); report.add(new String[] { "-----------------------------" }); ProfileReaderWriter.writeReport(report, "nameEntitiesTopicsOfInterestExtracted.csv"); } } // this func collects files private void addFiles(File file) { if (!file.exists()) { System.out.println(file + " does not exist."); } if (file.isDirectory()) { for (File f : file.listFiles()) { if (f.getName().startsWith(".")) continue; addFiles(f); System.out.println(f.getName()); } } else { queue.add(file); } } public static void main(String[] args) { AbstractEngineRunner runner = new AbstractEngineRunner(); runner.processJSONfileWithReviews(); } } /* * public void processDirectory(String path){ List<String[]> report = new * ArrayList<String[]>(); report.add(new String[] { "filename", * "named entity list", "phrases of potential interest list" }); * * List<String> allNamedEntities = new ArrayList<String>(); * * addFiles(new File(path)); for(File f: queue){ List<String> entities = * (List<String>) extractEntities(f.getAbsolutePath()).getFirst(); List<String> * opinions = (List<String>) extractEntities(f.getAbsolutePath()).getSecond(); * report.add(new String[]{ f.getName(), entities.toString(), * opinions.toString()}); ProfileReaderWriter.writeReport(report, * "nameEntitiesExtracted.csv"); * * allNamedEntities.addAll(entities); * * allNamedEntities = new ArrayList<String>(new HashSet<String> * (allNamedEntities )); * * * } ProfileReaderWriter.writeReport(report, * "nameEntitiesTopicsOfInterestExtracted.csv"); } } */