org.esipfed.eskg.nlp.OpenIE.java Source code

Java tutorial

Introduction

Here is the source code for org.esipfed.eskg.nlp.OpenIE.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License"); you 
 * may not use this file except in compliance with the License. 
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.esipfed.eskg.nlp;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;

import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.knowitall.openie.Argument;
import edu.knowitall.openie.Instance;
import edu.knowitall.tool.parse.ClearParser;
import edu.knowitall.tool.postag.ClearPostagger;
import edu.knowitall.tool.srl.ClearSrl;
import edu.knowitall.tool.tokenize.ClearTokenizer;
import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import scala.collection.JavaConversions;
import scala.collection.Seq;

public class OpenIE {

    private static final Logger LOG = LoggerFactory.getLogger(OpenIE.class);

    private OpenIE() {
        // default constructor
    }

    static String readFile(String path, Charset encoding) throws IOException {
        byte[] encoded = Files.readAllBytes(Paths.get(path));
        return new String(encoded, encoding);
    }

    public static void main(String[] args) throws IOException {

        SentenceDetector sentenceDetector = null;
        try {
            // need to change this to the resource folder
            InputStream modelIn = OpenIE.class.getClassLoader().getResourceAsStream("en-sent.bin");
            final SentenceModel sentenceModel = new SentenceModel(modelIn);
            modelIn.close();
            sentenceDetector = new SentenceDetectorME(sentenceModel);
        } catch (IOException ioe) {
            LOG.error("Error either reading 'en-sent.bin' file or creating SentanceModel: ", ioe);
            throw new IOException(ioe);
        }
        edu.knowitall.openie.OpenIE openIE = new edu.knowitall.openie.OpenIE(
                new ClearParser(new ClearPostagger(new ClearTokenizer())), new ClearSrl(), false, false);

        // any text file that contains English sentences would work
        File file = FileUtils.toFile(OpenIE.class.getClassLoader().getResource("test.txt"));
        String text = readFile(file.getAbsolutePath(), StandardCharsets.UTF_8);

        if (sentenceDetector != null) {
            String[] sentences = sentenceDetector.sentDetect(text);
            for (int i = 0; i < sentences.length; i++) {

                Seq<Instance> extractions = openIE.extract(sentences[i]);

                List<Instance> listExtractions = JavaConversions.seqAsJavaList(extractions);

                for (Instance instance : listExtractions) {
                    StringBuilder sb = new StringBuilder();

                    sb.append(instance.confidence()).append('\t').append(instance.extr().context()).append('\t')
                            .append(instance.extr().arg1().text()).append('\t').append(instance.extr().rel().text())
                            .append('\t');

                    List<Argument> listArg2s = JavaConversions.seqAsJavaList(instance.extr().arg2s());
                    for (Argument argument : listArg2s) {
                        sb.append(argument.text()).append("; ");
                    }

                    LOG.info(sb.toString());
                }
            }
        }

    }

}