Java tutorial
/* * Copyright 2015 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universitt Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core; import org.apache.commons.io.FileUtils; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.component.JCasAnnotator_ImplBase; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import java.io.File; import java.io.IOException; /** * Wrapper for RST parser by Feng et al., 2014 (ACL) http://www.cs.toronto.edu/~weifeng/software.html * <p/> * Note: Using {@code StanfordSegmenter} is preferred * * @author Ivan Habernal */ public class RSTAnnotator extends JCasAnnotator_ImplBase { /** * Path to downloaded RST parser (src/ dir) */ public static final String PARAM_RST_PARSER_SRC_DIR_PATH = "rstParserSrcDirPath"; @ConfigurationParameter(name = PARAM_RST_PARSER_SRC_DIR_PATH, mandatory = true) String rstParserSrcDirPath; /** * If true (default), calls "sanity_check.py" from RST parser during initialization */ public static final String PARAM_SANITY_CHECK_ON_INIT = "sanityCheckOnInit"; @ConfigurationParameter(name = PARAM_SANITY_CHECK_ON_INIT, mandatory = true, defaultValue = "true") boolean sanityCheckOnInit; /** * For debug purposes; keeps temporary .tree files in /tmp */ public static final String PARAM_KEEP_TMP_FILES = "keepTmpFiles"; @ConfigurationParameter(name = PARAM_KEEP_TMP_FILES, mandatory = true, defaultValue = "false") boolean keepTmpFiles; /** * For debugging purposes; logs output of the RST parser */ public static final String PARAM_DEBUG_RST_OUTPUT = "debugRSTOutput"; @ConfigurationParameter(name = PARAM_DEBUG_RST_OUTPUT, mandatory = true, defaultValue = "false") boolean debugRSTOutput; @Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // perform sanity check if (sanityCheckOnInit) { File rstParserSrcDir = new File(rstParserSrcDirPath); // create process ProcessBuilder processBuilder = new ProcessBuilder().inheritIO(); // working dir must be set to the src dir of RST parser processBuilder.directory(rstParserSrcDir); // run the command processBuilder.command("python", new File(rstParserSrcDir, "sanity_check.py").getAbsolutePath()); try { Process process = processBuilder.start(); // and wait int returnValue = process.waitFor(); if (returnValue != 0) { throw new RuntimeException("Process exited with code " + returnValue); } } catch (IOException | InterruptedException e) { throw new ResourceInitializationException(e); } } } @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { try { // parse String parse = parseWithRST(aJCas.getDocumentText()); if (parse != null) { // annotate RSTParseOutputReader reader = new RSTParseOutputReader(); reader.readParseOutput(parse, aJCas); } } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } /** * Runs the parser on the given text * * @param originalText text * @return parse tree * @throws IOException exception */ public String parseWithRST(String originalText) throws IOException { // temporary file in File tmpFileIn = File.createTempFile("rst_tmp", ".txt"); // output of RST parser is a .tree file File tmpFileOut = new File(tmpFileIn.getAbsolutePath() + ".tree"); // tmp log File tmpFileLog = new File(tmpFileIn.getAbsolutePath() + ".log"); try { // write the text into a temporary file FileUtils.writeStringToFile(tmpFileIn, originalText); String tmpDirName = System.getProperty("java.io.tmpdir"); File rstParserSrcDir = new File(rstParserSrcDirPath); // create process ProcessBuilder processBuilder = new ProcessBuilder().inheritIO(); // log to file processBuilder.redirectErrorStream(true); processBuilder.redirectOutput(ProcessBuilder.Redirect.to(tmpFileLog)); // working dir must be set to the src dir of RST parser processBuilder.directory(rstParserSrcDir); // run the command processBuilder.command("python", new File(rstParserSrcDir, "parse.py").getAbsolutePath(), "-t", tmpDirName, tmpFileIn.getAbsolutePath(), "-g"); Process process = processBuilder.start(); // and wait int returnValue = process.waitFor(); if (returnValue != 0) { throw new RuntimeException("Process exited with code " + returnValue); } // read the log if (this.debugRSTOutput) { getLogger().debug(FileUtils.readFileToString(tmpFileLog)); } // read the output if (tmpFileOut.exists()) { return FileUtils.readFileToString(tmpFileOut); } } catch (InterruptedException e) { throw new IOException(e); } finally { // clean up if (!keepTmpFiles) { FileUtils.deleteQuietly(tmpFileIn); FileUtils.deleteQuietly(tmpFileOut); FileUtils.deleteQuietly(tmpFileLog); } } return null; } }