Java tutorial
/* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universitt Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.experiments.argumentation.sequence.writers; import de.tudarmstadt.ukp.dkpro.argumentation.types.ArgumentComponent; import de.tudarmstadt.ukp.dkpro.argumentation.types.ArgumentUnitUtils; import de.tudarmstadt.ukp.dkpro.argumentation.types.Claim; import de.tudarmstadt.ukp.dkpro.argumentation.types.WebArgumentMetadata; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Paragraph; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; import de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiReader; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.component.JCasConsumer_ImplBase; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.factory.AnalysisEngineFactory; import org.apache.uima.fit.factory.CollectionReaderFactory; import org.apache.uima.fit.pipeline.SimplePipeline; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import java.io.*; import java.util.ArrayList; import java.util.List; /** * @author Ivan Habernal */ public class HtmlWriter extends JCasConsumer_ImplBase { /** * Output folder where the output HTML files are stored */ public static final String PARAM_OUTPUT_FOLDER = "outputFolder"; @ConfigurationParameter(name = PARAM_OUTPUT_FOLDER, mandatory = true) private File outputFolder; @Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); if (!outputFolder.exists()) { outputFolder.mkdirs(); } } public static List<String> renderDocumentToHtmlParagraphs(JCas jCas) { List<String> result = new ArrayList<>(); // iterate over paragraphs for (Paragraph p : JCasUtil.select(jCas, Paragraph.class)) { StringWriter writer = new StringWriter(); PrintWriter out = new PrintWriter(writer); // iterate over tokens for (Token t : JCasUtil.selectCovered(jCas, Token.class, p)) { // print token's preceding space if needed if (ArgumentPrinterUtils.hasSpaceBefore(t, jCas)) { out.print(" "); } // does an argument concept begin here? ArgumentComponent argumentConcept = ArgumentPrinterUtils.argAnnotationBegins(t, jCas); if (argumentConcept != null) { out.printf("<span class=\"component\">%s:</span> <span class=\"%s\">", argumentConcept.getClass().getSimpleName().toLowerCase(), argumentConcept.getClass().getSimpleName().toLowerCase()); } Sentence sentence = ArgumentPrinterUtils.sentenceStartsOnToken(t); if (sentence != null) { out.printf("<span class=\"sentence\">S%d</span>", ArgumentPrinterUtils.getSentenceNumber(sentence, jCas)); } // print token out.print(t.getCoveredText()); // does an argument concept end here? if (ArgumentPrinterUtils.argAnnotationEnds(t, jCas)) { out.print("</span>"); } } result.add(writer.toString()); } return result; } @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { try { File outFile = new File(outputFolder, DocumentMetaData.get(aJCas).getDocumentId() + ".html"); PrintWriter out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(outFile), "UTF-8")); // print header printHeader(aJCas, out); // print paragarphs List<String> paragraphs = renderDocumentToHtmlParagraphs(aJCas); out.printf("<p>%s</p>", StringUtils.join(paragraphs, "<br/><br/>")); out.printf("<hr />"); // implicit claim? for (Claim claim : JCasUtil.select(aJCas, Claim.class)) { if (ArgumentUnitUtils.isImplicit(claim)) { out.printf( "<p><span class=\"component\">Implicit claim:</span> <span class=\"claim\">%s</span></p>", ArgumentUnitUtils.getProperty(claim, ArgumentUnitUtils.PROP_KEY_REPHRASED_CONTENT)); } } // appeal to emotions for (ArgumentComponent component : JCasUtil.select(aJCas, ArgumentComponent.class)) { if (ArgumentUnitUtils.getProperty(component, ArgumentUnitUtils.PROP_KEY_IS_APPEAL_TO_EMOTION) != null) { out.printf( "<p><span class=\"component\">Appeal to emotions:</span> <span class=\"appeal\">%s</span></p>", component.getCoveredText()); } } // print footer printFooter(aJCas, out); IOUtils.closeQuietly(out); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } @Override public void collectionProcessComplete() throws AnalysisEngineProcessException { super.collectionProcessComplete(); // copy css try { IOUtils.copy(this.getClass().getClassLoader().getResourceAsStream("style.css"), new FileOutputStream(new File(outputFolder, "style.css"))); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } private void printFooter(JCas aJCas, PrintWriter out) { out.printf("</body>\n</html>"); } private void printHeader(JCas aJCas, PrintWriter out) { WebArgumentMetadata webArgumentMetadata = JCasUtil.selectSingle(aJCas, WebArgumentMetadata.class); out.printf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE html PUBLIC" + " \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n\"http://www.w3.org/TR/xhtml1/" + "DTD/strict.dtd\">\n<html xmlns=\"http://www.w3.org/TR/xhtml1/strict\" >\n" + "<head>\n<title>%s</title>\n" + "<link rel=\"stylesheet\" href=\"style.css\"/>\n" + " <meta http-equiv=\"content-type\" content=\"application/xhtml+xml; charset=UTF-8\" />\n" + "</head>\n<body>\n", webArgumentMetadata.getOrigId()); out.printf("<h1>%s</h1>\n<h2>%s, %s</h2>\n", webArgumentMetadata.getOrigId(), webArgumentMetadata.getTopic(), webArgumentMetadata.getDocType()); } public static void main(String[] args) throws Exception { String goldDataDir = args[0]; SimplePipeline.runPipeline( CollectionReaderFactory.createReaderDescription(XmiReader.class, XmiReader.PARAM_SOURCE_LOCATION, goldDataDir, XmiReader.PARAM_PATTERNS, "[+]*.xmi"), AnalysisEngineFactory.createEngineDescription(HtmlWriter.class, HtmlWriter.PARAM_OUTPUT_FOLDER, "/tmp/out") // , // AnalysisEngineFactory.createEngineDescription( // ArgumentDumpWriter.class, // ArgumentDumpWriter.PARAM_INCLUDE_RELATIONS, false // ) ); } }