lu.list.itis.dkd.aig.process.OnDemandTextOntologyCreationProcess.java Source code

Java tutorial

Introduction

Here is the source code for lu.list.itis.dkd.aig.process.OnDemandTextOntologyCreationProcess.java

Source

/**
 * Copyright (c) 2016-2017  Luxembourg Institute of Science and Technology (LIST).
 * 
 * This software is licensed under the Apache License, Version 2.0 (the "License") ; you
 * may not use this file except in compliance with the License. You may obtain a copy of the License
 * at : http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 * 
 * for more information about the software, please contact info@list.lu
 */
package lu.list.itis.dkd.aig.process;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Map;

import com.google.common.base.Strings;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import com.google.common.collect.SetMultimap;

import lu.list.itis.dkd.aig.Value;
import lu.list.itis.dkd.aig.ValueType;
import lu.list.itis.dkd.aig.Variable;
import lu.list.itis.dkd.aig.VariableBuilder;
import lu.list.itis.dkd.aig.resolution.ResolutionException;
import lu.list.itis.dkd.aig.resolution.TemplateConsistencyException;
import lu.list.itis.dkd.aig.resolution.TemplateParseException;
import lu.list.itis.dkd.aig.util.Externalization;
import lu.list.itis.dkd.aig.util.FusekiHttpHelper;
import lu.list.itis.dkd.aig.util.PropertiesFetcher;
import lu.list.itis.dkd.assess.cloze.generation.DistractorGenerator;
import lu.list.itis.dkd.assess.cloze.ontology.ClozeOntology;
import lu.list.itis.dkd.assess.cloze.option.ClozeText;
import lu.list.itis.dkd.assess.cloze.util.ClozeVariable.Approach;
import lu.list.itis.dkd.assess.cloze.util.ClozeVariable.Difficulty;
import lu.list.itis.dkd.assess.opennlp.ontology.TextOntology;
import lu.list.itis.dkd.assess.opennlp.util.Type.Language;
import lu.list.itis.dkd.dbc.annotation.NonNullByDefault;

@NonNullByDefault
public final class OnDemandTextOntologyCreationProcess extends InitializationProcess {

    private final BiMap<String, String> variableKeyToEncodedKey = HashBiMap.create();
    private BiMap<String, String> encodedKeyToVariableKey;
    private Variable datasourceVariable;
    private Value datasourceValue;
    private String datasourceKey;

    private int numberOfDistractors = 3;
    private boolean skipFirstSentence = true;
    private boolean useNGramArticleCorrection = true;
    private boolean useGrammaticalDependencies = false;
    private boolean useSoundex = true;
    private boolean generateFeedback = false;
    private Difficulty difficulty = Difficulty.MEDIUM;
    private Approach approach = Approach.DEFINITION;

    /**
     * Constructor initializing all fields.
     *
     * @param parameters
     *            A list of parameters keyed by the parameter name to draw all
     *            fields values from.
     * @throws TemplateParseException
     *             Thrown when one or more of the required parameter values was
     *             missing or of an erroneous value.
     */
    public OnDemandTextOntologyCreationProcess(final ArrayListMultimap<String, String> parameters)
            throws TemplateParseException, ResolutionException {
        super(parameters);

        checkArgument(parameters.containsKey(Externalization.DATASOURCE_KEY),
                "The datasource key provided for the process must be specified and non-emtpy!");

        datasourceKey = Strings.emptyToNull(parameters.get(Externalization.DATASOURCE_KEY).get(0));
        checkNotNull(datasourceKey, "The datasource key provided for the process must be specified and non-emtpy!");

        URI datasource = outcomeVariablesByValueKey.get(datasourceKey);
        datasourceVariable = VariableBuilder.getVariableFromBlueprint(datasource);

        try {
            datasourceValue = datasourceVariable.getValueByIdentifier(datasourceKey);
        } catch (TemplateConsistencyException e) {
            throw new TemplateParseException(
                    "Unable to get value: " + datasourceKey + " in: " + datasourceValue.getIdentifier(), e);
        }

        if (parameters.containsKey(Externalization.NUMBER_OF_DISTRACTORS)) {
            numberOfDistractors = Integer.parseInt(parameters.get(Externalization.NUMBER_OF_DISTRACTORS).get(0));
        }

        if (parameters.containsKey(Externalization.FIRST_SENTENCE)) {
            skipFirstSentence = Boolean.parseBoolean(parameters.get(Externalization.FIRST_SENTENCE).get(0));
        }
        if (parameters.containsKey(Externalization.USE_NGRAM_ARTICLE_CORRECTION)) {
            useNGramArticleCorrection = Boolean
                    .parseBoolean(parameters.get(Externalization.USE_NGRAM_ARTICLE_CORRECTION).get(0));
        }
        if (parameters.containsKey(Externalization.USE_GRAMMATICAL_DEPENDENCIES)) {
            useGrammaticalDependencies = Boolean
                    .parseBoolean(parameters.get(Externalization.USE_GRAMMATICAL_DEPENDENCIES).get(0));
        }
        if (parameters.containsKey(Externalization.SOUNDEX)) {
            useSoundex = Boolean.parseBoolean(parameters.get(Externalization.SOUNDEX).get(0));
        }
        if (parameters.containsKey(Externalization.FEEDBACK)) {
            generateFeedback = Boolean.parseBoolean(parameters.get(Externalization.FEEDBACK).get(0));
        }
        if (parameters.containsKey(Externalization.APPROACH)) {
            approach = Approach.valueOf(parameters.get(Externalization.APPROACH).get(0).toUpperCase());
        }
        if (parameters.containsKey(Externalization.DIFFICULTY)) {
            difficulty = Difficulty.valueOf(parameters.get(Externalization.DIFFICULTY).get(0).toUpperCase());
        }

    }

    /**
     * {@inheritDoc}<br>
     *
     * The method will execute a query to Jena, encoding key variables as
     * necessary. The latter will be stored in a local {@link BiMap} for easy
     * referencing. The query solution is then
     *
     * @throws ResolutionException
     *
     * @throws TemplateConsistencyException
     *             Thrown when one or more variables, respectively their values,
     *             were found in the query that could not be identified, i.e.
     *             their identifier was not a valid URI or not mapped due to
     *             variable definitions containing errors.
     */
    @Override
    public void initializeVariables(final Map<String, String> input, final SetMultimap<URI, Variable> variables)
            throws ResolutionException, TemplateConsistencyException {

        configureClozeGenerator();

        ClozeText clozeText = new ClozeText(input.get("text"), "body", Language.EN, approach, numberOfDistractors, //$NON-NLS-1$
                skipFirstSentence);

        TextOntology textOntology = new TextOntology(clozeText, "onDemandCloze");
        ClozeOntology clozeOntology = new ClozeOntology(clozeText, textOntology);
        //clozeOntology.save("C:\\Temp", "OWL");

        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
        clozeOntology.save(outputStream);
        ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray());

        String dataSetNamePrefix = PropertiesFetcher.getProperties().getProperty(Externalization.FUSEKI_DATASET)
                + '_';
        String dataSetName = FusekiHttpHelper.createRandomDataSetName(dataSetNamePrefix);
        try {
            FusekiHttpHelper.createDataSet(dataSetName);
            FusekiHttpHelper.uploadOntology(inputStream, dataSetName, null);
        } catch (IOException | org.apache.http.HttpException e) {
            throw new ResolutionException("Failed to create dataset", e);
        }

        // set datasetname
        URL endpoint;
        try {
            endpoint = FusekiHttpHelper.getSparqlEndPoint(dataSetName);
        } catch (MalformedURLException e) {
            throw new ResolutionException("Failed to get endpoint address", e);
        }

        datasourceValue.setInnerValueTo(endpoint.toString());
        variables.put(datasourceVariable.getIdentifier(), datasourceVariable);

    }

    private void configureClozeGenerator() {
        DistractorGenerator.setDependencyUsage(useGrammaticalDependencies);
        DistractorGenerator.useSoundex(useSoundex);
        DistractorGenerator.useKeyArticleSearch(useNGramArticleCorrection);
        DistractorGenerator.setDistractorArticleSearch(useNGramArticleCorrection);
    }
}