de.tudarmstadt.ukp.experiments.argumentation.sequence.writers.HtmlWriter.java Source code

Java tutorial

Introduction

Here is the source code for de.tudarmstadt.ukp.experiments.argumentation.sequence.writers.HtmlWriter.java

Source

/*
 * Copyright 2016
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package de.tudarmstadt.ukp.experiments.argumentation.sequence.writers;

import de.tudarmstadt.ukp.dkpro.argumentation.types.ArgumentComponent;
import de.tudarmstadt.ukp.dkpro.argumentation.types.ArgumentUnitUtils;
import de.tudarmstadt.ukp.dkpro.argumentation.types.Claim;
import de.tudarmstadt.ukp.dkpro.argumentation.types.WebArgumentMetadata;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Paragraph;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiReader;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasConsumer_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

import java.io.*;
import java.util.ArrayList;
import java.util.List;

/**
 * @author Ivan Habernal
 */
public class HtmlWriter extends JCasConsumer_ImplBase {
    /**
     * Output folder where the output HTML files are stored
     */
    public static final String PARAM_OUTPUT_FOLDER = "outputFolder";

    @ConfigurationParameter(name = PARAM_OUTPUT_FOLDER, mandatory = true)
    private File outputFolder;

    @Override
    public void initialize(UimaContext context) throws ResourceInitializationException {
        super.initialize(context);

        if (!outputFolder.exists()) {
            outputFolder.mkdirs();
        }
    }

    public static List<String> renderDocumentToHtmlParagraphs(JCas jCas) {
        List<String> result = new ArrayList<>();

        // iterate over paragraphs
        for (Paragraph p : JCasUtil.select(jCas, Paragraph.class)) {
            StringWriter writer = new StringWriter();
            PrintWriter out = new PrintWriter(writer);
            // iterate over tokens
            for (Token t : JCasUtil.selectCovered(jCas, Token.class, p)) {
                // print token's preceding space if needed
                if (ArgumentPrinterUtils.hasSpaceBefore(t, jCas)) {
                    out.print(" ");
                }

                // does an argument concept begin here?
                ArgumentComponent argumentConcept = ArgumentPrinterUtils.argAnnotationBegins(t, jCas);
                if (argumentConcept != null) {
                    out.printf("<span class=\"component\">%s:</span> <span class=\"%s\">",
                            argumentConcept.getClass().getSimpleName().toLowerCase(),
                            argumentConcept.getClass().getSimpleName().toLowerCase());
                }

                Sentence sentence = ArgumentPrinterUtils.sentenceStartsOnToken(t);
                if (sentence != null) {
                    out.printf("<span class=\"sentence\">S%d</span>",
                            ArgumentPrinterUtils.getSentenceNumber(sentence, jCas));
                }

                // print token
                out.print(t.getCoveredText());

                // does an argument concept end here?
                if (ArgumentPrinterUtils.argAnnotationEnds(t, jCas)) {
                    out.print("</span>");
                }
            }

            result.add(writer.toString());
        }

        return result;
    }

    @Override
    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        try {
            File outFile = new File(outputFolder, DocumentMetaData.get(aJCas).getDocumentId() + ".html");
            PrintWriter out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(outFile), "UTF-8"));

            // print header
            printHeader(aJCas, out);

            // print paragarphs
            List<String> paragraphs = renderDocumentToHtmlParagraphs(aJCas);
            out.printf("<p>%s</p>", StringUtils.join(paragraphs, "<br/><br/>"));

            out.printf("<hr />");

            // implicit claim?
            for (Claim claim : JCasUtil.select(aJCas, Claim.class)) {
                if (ArgumentUnitUtils.isImplicit(claim)) {
                    out.printf(
                            "<p><span class=\"component\">Implicit claim:</span> <span class=\"claim\">%s</span></p>",
                            ArgumentUnitUtils.getProperty(claim, ArgumentUnitUtils.PROP_KEY_REPHRASED_CONTENT));
                }
            }

            // appeal to emotions
            for (ArgumentComponent component : JCasUtil.select(aJCas, ArgumentComponent.class)) {
                if (ArgumentUnitUtils.getProperty(component,
                        ArgumentUnitUtils.PROP_KEY_IS_APPEAL_TO_EMOTION) != null) {
                    out.printf(
                            "<p><span class=\"component\">Appeal to emotions:</span> <span class=\"appeal\">%s</span></p>",
                            component.getCoveredText());
                }
            }

            // print footer
            printFooter(aJCas, out);

            IOUtils.closeQuietly(out);
        } catch (IOException e) {
            throw new AnalysisEngineProcessException(e);
        }
    }

    @Override
    public void collectionProcessComplete() throws AnalysisEngineProcessException {
        super.collectionProcessComplete();

        // copy css
        try {
            IOUtils.copy(this.getClass().getClassLoader().getResourceAsStream("style.css"),
                    new FileOutputStream(new File(outputFolder, "style.css")));
        } catch (IOException e) {
            throw new AnalysisEngineProcessException(e);
        }
    }

    private void printFooter(JCas aJCas, PrintWriter out) {
        out.printf("</body>\n</html>");
    }

    private void printHeader(JCas aJCas, PrintWriter out) {
        WebArgumentMetadata webArgumentMetadata = JCasUtil.selectSingle(aJCas, WebArgumentMetadata.class);

        out.printf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE html PUBLIC"
                + " \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n\"http://www.w3.org/TR/xhtml1/"
                + "DTD/strict.dtd\">\n<html xmlns=\"http://www.w3.org/TR/xhtml1/strict\" >\n"
                + "<head>\n<title>%s</title>\n" + "<link rel=\"stylesheet\" href=\"style.css\"/>\n"
                + " <meta http-equiv=\"content-type\" content=\"application/xhtml+xml; charset=UTF-8\" />\n"
                + "</head>\n<body>\n", webArgumentMetadata.getOrigId());
        out.printf("<h1>%s</h1>\n<h2>%s, %s</h2>\n", webArgumentMetadata.getOrigId(),
                webArgumentMetadata.getTopic(), webArgumentMetadata.getDocType());
    }

    public static void main(String[] args) throws Exception {
        String goldDataDir = args[0];

        SimplePipeline.runPipeline(
                CollectionReaderFactory.createReaderDescription(XmiReader.class, XmiReader.PARAM_SOURCE_LOCATION,
                        goldDataDir, XmiReader.PARAM_PATTERNS, "[+]*.xmi"),
                AnalysisEngineFactory.createEngineDescription(HtmlWriter.class, HtmlWriter.PARAM_OUTPUT_FOLDER,
                        "/tmp/out")
        //                ,
        //                AnalysisEngineFactory.createEngineDescription(
        //                        ArgumentDumpWriter.class,
        //                        ArgumentDumpWriter.PARAM_INCLUDE_RELATIONS, false
        //                )
        );
    }
}