ch.uzh.ifi.attempto.acewiki.gf.GfSentence.java Source code

Introduction

Here is the source code for ch.uzh.ifi.attempto.acewiki.gf.GfSentence.java
Source

// This file is part of AceWiki.
// Copyright 2013, AceWiki developers.
//
// AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU
// Lesser General Public License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License along with AceWiki. If
// not, see http://www.gnu.org/licenses/.

package ch.uzh.ifi.attempto.acewiki.gf;

import static ch.uzh.ifi.attempto.ape.OutputType.DRSPP;
import static ch.uzh.ifi.attempto.ape.OutputType.OWLFSSPP;
import static ch.uzh.ifi.attempto.ape.OutputType.PARAPHRASE1;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang.StringEscapeUtils;
import org.semanticweb.owlapi.model.OWLAxiom;
import org.semanticweb.owlapi.model.OWLOntologyCreationException;
import org.semanticweb.owlapi.model.SWRLRule;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Sets;

import ch.uzh.ifi.attempto.acewiki.core.Declaration;
import ch.uzh.ifi.attempto.acewiki.core.MultilingualSentence;
import ch.uzh.ifi.attempto.acewiki.core.OntologyElement;
import ch.uzh.ifi.attempto.acewiki.core.PrettyTextElement;
import ch.uzh.ifi.attempto.acewiki.core.SentenceDetail;
import ch.uzh.ifi.attempto.acewiki.owl.AceWikiOWLReasoner2;
import ch.uzh.ifi.attempto.acewiki.owl.OWLSentence;
import ch.uzh.ifi.attempto.ape.ACEParserResult;
import ch.uzh.ifi.attempto.ape.ACEText;
import ch.uzh.ifi.attempto.base.DefaultTextOperator;
import ch.uzh.ifi.attempto.base.MultiTextContainer;
import ch.uzh.ifi.attempto.base.TextContainer;
import ch.uzh.ifi.attempto.base.TextElement;
import ch.uzh.ifi.attempto.base.TextOperator;
import ch.uzh.ifi.attempto.echocomp.LocaleResources;
import ch.uzh.ifi.attempto.gfservice.GfServiceException;

/**
 * This class represents a sentence for the GF AceWiki engine.
 * A "sentence" is a tree set that can be linearized into multiple
 * languages.
 *
 * @author Kaarel Kaljurand
 */
public abstract class GfSentence extends MultilingualSentence implements OWLSentence {

    private final Logger mLogger = LoggerFactory.getLogger(GfSentence.class);

    protected final GfGrammar mGfGrammar;
    protected final GfWikiEntry mGfWikiEntry;

    // Use the original input in the text container
    // TODO: this is a hack
    boolean mUseOriginal = true;

    // Maps a language identifier to the set of linearizations (text containers) in this language
    final Map<String, MultiTextContainer> textContainers = new HashMap<>();

    // Maps a tree to the set of linearizations for each language.
    // ie. Map<Tree, Map<Language, Set<Linearization>>>
    // Lazy initialized as tree linearizations are requested, but done for all languages at once - performance reasons.
    private final Map<String, Map<String, Set<String>>> treeLinearizations = new HashMap<>();

    // These fields are evaluated lazily
    private Boolean isReasonable;
    private Boolean isOWL;
    private Boolean isOWLSWRL;
    private Set<OWLAxiom> owlAxioms;

    /**
     * Creates a declaration from an existing wiki entry (from the storage).
     * Each wiki entry is tagged with the language in which the entry was created.
     * Each wiki entry also contains the original text that was used to create the
     * trees. In case the trees can not be linearized (because the grammar has changed)
     * then we try to parse the original text instead (which might also fail).
     */
    public GfSentence(GfGrammar grammar, GfWikiEntry entry) {
        mGfGrammar = grammar;

        boolean isParseable = (entry.getLanguage() != null && entry.getText() != null);

        if (isParseable && hasBadTrees(entry.getLanguage(), entry.getTrees())) {
            mGfWikiEntry = createGfWikiEntry(grammar, entry.getLanguage(), entry.getText());
        } else {
            mGfWikiEntry = entry;
        }
    }

    /**
     * Creates a declaration object from the given text.
     * The text will be parsed to get the trees.
     */
    public GfSentence(GfGrammar grammar, String language, String text) {
        mGfGrammar = grammar;
        mGfWikiEntry = createGfWikiEntry(grammar, language, text);
    }

    public static GfSentence createGfSentence(GfGrammar grammar, GfWikiEntry entry) {
        if (entry.getText().contains("?")) {
            return new GfQuestion(grammar, entry);
        }
        return new GfDeclaration(grammar, entry);
    }

    /**
     * Maps this declaration to its visual representation in the given language.
     * The declaration is a set of trees.
     * Each tree can in principle have multiple linearizations (variants), but we currently
     * consider only the first (canonical) variant.
     * We have to handle the following:
     *   - the linearization fails (e.g. tree is malformed)
     *   - a tree has an empty set of linearizations
     *   - a linearization is an empty string
     *   - a lineariation repeats
     *   - the user wants to see what he/she originally entered not a rewrite into the canonical variant
     */
    public MultiTextContainer getTextContainer(String language) {
        MultiTextContainer mtc = textContainers.get(language);
        if (mtc == null) {
            List<TextContainer> tmp = new ArrayList<>();
            TextOperator to = getTextOperator(language);

            // If the text is requested in the original language (i.e. in which the entry was first created)
            // then we return the original text. The benefit is that we do not need to make a call
            // to the linearizer. Also, there is no danger that the original text would be replaced by a variant
            // (e.g. "does not -> doesn't") which would be confusing.
            if (mUseOriginal && language.equals(mGfWikiEntry.getLanguage()) && mGfWikiEntry.getText() != null) {
                tmp = ImmutableList.of(makeTextContainer(to, mGfWikiEntry.getText()));
            } else {
                Set<String> seen = Sets.newHashSet();
                for (String tree : mGfWikiEntry.getTrees().getTrees()) {
                    Set<String> lins = getLins(tree, language);
                    if (lins == null) {
                        mLogger.info("getTextContainer: null {}: {}", language, tree);
                        // TODO do it properly
                        tmp.add(new TextContainer(new TextElement("-NULL-" + tree)));
                    } else if (lins.isEmpty()) {
                        mLogger.info("getTextContainer: empty {}: {}", language, tree);
                        // TODO do it properly
                        tmp.add(new TextContainer(new TextElement("-EMPTY-" + tree)));
                    } else {
                        String lin = lins.iterator().next();
                        if (lin.isEmpty() || seen.contains(lin)) {
                            // Don't show an empty lin and the same lin twice
                        } else {
                            seen.add(lin);
                            tmp.add(makeTextContainer(to, lin));
                        }
                    }
                }
            }
            if (tmp.isEmpty()) {
                tmp.add(new TextContainer(new TextElement("-NO_LINEARIZATION_FOUND-")));
            }
            mtc = new MultiTextContainer(tmp);
            textContainers.put(language, mtc);
        }
        return mtc;
    }

    /**
     * TODO: this is temporary. Also note that it does not have effect on the
     * original input as this is always taken from mGfWikiEntry.getText()
     */
    public void clearLinearizations() {
        textContainers.clear();
        treeLinearizations.clear();
        mUseOriginal = false;
    }

    TextContainer makeTextContainer(TextOperator to, String str) {
        TextContainer tc = new TextContainer(to);
        for (String s : to.splitIntoTokens(str)) {
            tc.addElement(new PrettyTextElement(s));
        }
        return tc;
    }

    /**
     * TODO
     */
    public boolean contains(OntologyElement e) {
        return false;
    }

    /**
     * Returns the details of this tree set:
     * 
     *   - abstract trees;
     *   - translations;
     *   - abstract tree diagram;
     *   - parse tree diagram;
     *   - word alignment diagram;
     *   - ...
     * 
     * The output highlights the given language.
     *
     * TODO: everything should be hyperlinked.
     */
    public List<SentenceDetail> getDetails(String lang, int index) {
        List<SentenceDetail> l = new ArrayList<SentenceDetail>();

        if (mGfGrammar.isAceCompatible()) {
            l.addAll(getSemantics(index));
        }

        l.addAll(formatTree(mGfGrammar, lang, index));

        return l;
    }

    public int getNumberOfRepresentations() {
        return mGfWikiEntry.getTrees().size();
    }

    public List<String> getParseTrees() {
        return mGfWikiEntry.getTrees().getTrees();
    }

    public GfWikiEntry getGfWikiEntry() {
        return mGfWikiEntry;
    }

    public String serialize() {
        return GfGrammar.serialize(mGfWikiEntry);
    }

    public void update() {
        Set<Set<OWLAxiom>> setOfSetOfAxiom = null;
        String uri = getOntology().getURI();
        try {
            setOfSetOfAxiom = GfOwlConverter.convert(mGfGrammar, uri, mGfWikiEntry);
        } catch (OWLOntologyCreationException e1) {
            // TODO
        }

        if (setOfSetOfAxiom == null || setOfSetOfAxiom.isEmpty()) {
            isOWLSWRL = isOWL = isReasonable = false;
            owlAxioms = new HashSet<OWLAxiom>();
        } else {
            isOWLSWRL = isOWL = isReasonable = true;
            owlAxioms = GfOwlConverter.disambiguate(setOfSetOfAxiom);

            // TODO: currently not reasoning with SWRL rules, this should
            // be controlled by the profile instead
            for (OWLAxiom ax : owlAxioms) {
                if (ax instanceof SWRLRule) {
                    isOWL = isReasonable = false;
                    mLogger.info("Axiom is SWRL rule: {}", ax);
                    break;
                }
            }
        }

        // TODO: check also questions somehow, e.g. EL probably does not allow inverse properties in questions
        if (isReasonable && this instanceof Declaration) {
            AceWikiOWLReasoner2 reasoner = (AceWikiOWLReasoner2) getOntology().getReasoner().getWrappedReasoner();
            isReasonable = GfOwlConverter.isReasonable(reasoner, owlAxioms);
        }

        if (!isReasonable && isIntegrated()) {
            super.setIntegrated(false);
        }
    }

    // TODO: this method does not make sense for GF-wiki entries
    // because they can be ambiguous.
    public String getPrettyOWL() {
        return null;
    }

    public boolean isReasonable() {
        if (isReasonable == null) {
            update();
        }
        return isReasonable;
    }

    public boolean isOWL() {
        if (isOWL == null) {
            update();
        }
        return isOWL;
    }

    public boolean isOWLSWRL() {
        if (isOWLSWRL == null) {
            update();
        }
        return isOWLSWRL;
    }

    public Set<OWLAxiom> getOWLAxioms() {
        if (owlAxioms == null) {
            update();
        }
        return owlAxioms;
    }

    /**
     * Returns the grammar object.
     * 
     * @return The grammar object.
     */
    protected GfGrammar getGfGrammar() {
        return mGfGrammar;
    }

    // Return some of the APE analysis of the tree at the given index.
    // The APE analysis is obtained by first linearizing the tree in "Ape".
    // This only works if the wiki is ACE-based.
    //
    // TODO: experimental
    private List<SentenceDetail> getSemantics(int index) {
        String tree = mGfWikiEntry.getTrees().getTrees().get(index);
        List<SentenceDetail> l = new ArrayList<SentenceDetail>();

        if (tree == null) {
            l.add(new SentenceDetail("ERROR", "Statement is not well-formed"));
            return l;
        }

        try {
            ACEText acetext = GfWikiUtils.getACEText(mGfGrammar, tree);
            ACEParserResult pr = GfWikiUtils.parse(acetext, getOntology().getURI(), PARAPHRASE1, OWLFSSPP, DRSPP);

            l.add(new SentenceDetail("ACE", "<pre>" + StringEscapeUtils.escapeHtml(acetext.getText()) + "</pre>"));
            l.add(new SentenceDetail("ACE (paraphrase)",
                    "<pre>" + StringEscapeUtils.escapeHtml(pr.get(PARAPHRASE1)) + "</pre>"));
            l.add(new SentenceDetail("OWL", "<pre>" + StringEscapeUtils.escapeHtml(pr.get(OWLFSSPP)) + "</pre>"));
            l.add(new SentenceDetail("DRS", "<pre>" + StringEscapeUtils.escapeHtml(pr.get(DRSPP)) + "</pre>"));
            l.add(new SentenceDetail("Lexicon",
                    "<pre>" + StringEscapeUtils.escapeHtml(Joiner.on('\n').join(acetext.getLexicon().getEntries()))
                            + "</pre>"));
            l.add(new SentenceDetail("Messages", "<pre>"
                    + StringEscapeUtils.escapeHtml(Joiner.on('\n').join(pr.getMessageContainer().getMessages()))
                    + "</pre>"));
        } catch (Exception e) {
            l.add(new SentenceDetail("ERROR", e.getMessage()));
        }

        return l;
    }

    private String getAbstrtreeAsHtml(String tree) {
        try {
            return getImg(getGfGrammar().abstrtree(tree));
        } catch (GfServiceException e) {
            return getError(e);
        }
    }

    private String getParsetreeAsHtml(String tree, String language) {
        try {
            return getImg(getGfGrammar().parsetree(tree, language));
        } catch (GfServiceException e) {
            return getError(e);
        }
    }

    private String getImg(String dataUri) {
        return "<a href=\"" + dataUri + "\"><img src=\"" + dataUri + "\" style=\"max-height:500px\"/></a>";
    }

    private static String getError(Exception e) {
        return "<p style=\"color: red\">" + e.getMessage() + "</p>";
    }

    private List<SentenceDetail> formatTree(GfGrammar grammar, String lang, int index) {
        String tree = mGfWikiEntry.getTrees().getTrees().get(index);
        List<SentenceDetail> l = new ArrayList<SentenceDetail>();
        l.add(new SentenceDetail("acewiki_details_syntree", getParsetreeAsHtml(tree, lang)));
        l.add(new SentenceDetail(LocaleResources.getString("acewiki_details_internal") + " (ASCII)",
                "<p><code>" + tree + "</code></p>"));
        l.add(new SentenceDetail("acewiki_details_internal", getAbstrtreeAsHtml(tree)));
        return l;
    }

    Set<String> getLins(String tree, String language) {
        // Linearization of a single tree to all possible languages.
        Map<String, Set<String>> tl = treeLinearizations.get(tree);

        if (tl == null) {
            try {
                tl = getGfGrammar().linearize(tree);
                treeLinearizations.put(tree, tl);
            } catch (GfServiceException e) {
                // TODO find out what happened, i.e.
                // why was the tree not supported by the grammar.
                mLogger.warn("tree not supported by the grammar - {}: {}", language, tree);
                return null;
            }
        }

        return tl.get(language);
    }

    private boolean hasBadTrees(String language, TreeList treeList) {
        for (String tree : treeList.getTrees()) {
            Set<String> lins = getLins(tree, language);
            if (lins == null || lins.isEmpty()) {
                return true;
            }
        }
        return false;
    }

    private static GfWikiEntry createGfWikiEntry(GfGrammar grammar, String language, String text) {
        try {
            Set<String> trees = grammar.parse(text, language);
            if (trees == null) {
                throw new RuntimeException("No such language: " + language);
            }
            if (trees.isEmpty()) {
                // TODO this should be done properly; see GfTextOperator
                // If parsing fails: first char to lower case
                text = DefaultTextOperator.firstCharToLowerCase(text);
                trees = grammar.parse(text, language);
            }
            return new GfWikiEntry(language, text, new TreeList(trees));
        } catch (GfServiceException e) {
            throw new RuntimeException(e.getMessage());
        }
    }
}