org.wikidata.wdtk.wikibaseapi.WbEditEntityAction.java Source code

Java tutorial

Introduction

Here is the source code for org.wikidata.wdtk.wikibaseapi.WbEditEntityAction.java

Source

package org.wikidata.wdtk.wikibaseapi;

/*
 * #%L
 * Wikidata Toolkit Wikibase API
 * %%
 * Copyright (C) 2014 - 2015 Wikidata Toolkit Developers
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
import org.wikidata.wdtk.datamodel.json.jackson.JacksonTermedStatementDocument;
import org.wikidata.wdtk.wikibaseapi.apierrors.MaxlagErrorException;
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
import org.wikidata.wdtk.wikibaseapi.apierrors.TokenErrorException;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader;

/**
 * Java implementation for the wbeditentity API action.
 *
 * @author Michael Guenther
 * @author Markus Kroetzsch
 */
public class WbEditEntityAction {

    static final Logger logger = LoggerFactory.getLogger(WbEditEntityAction.class);

    static int MAXLAG_SLEEP_TIME = 5000;

    /**
     * Connection to an Wikibase API.
     */
    final ApiConnection connection;

    /**
     * The IRI that identifies the site that the data is from.
     */
    final String siteIri;

    /**
     * Mapper object used for deserializing JSON data.
     */
    final ObjectMapper mapper = new ObjectMapper();

    /**
     * Current CSRF (Cross-Site Request Forgery) token, or null if no valid
     * token is known.
     */
    String csrfToken = null;

    /**
     * Value in seconds of MediaWiki's maxlag parameter. Shorter is nicer,
     * longer is more aggressive.
     */
    int maxLag = 5;

    /**
     * Number of recent editing times to monitor in order to avoid editing too
     * fast. Wikidata.org seems to block fast editors after 9 edits, so this
     * size seems to make sense.
     */
    final static int editTimeWindow = 9;

    /**
     * Average time to wait after each edit. Individual edits can be faster than
     * this, but it is ensured that this time will be taken per edit in the long
     * run.
     */
    int averageMsecsPerEdit = 2000;

    /**
     * Times of the last {@link #editTimeWindow} edits. Used in a loop. Most
     * recent edit time is at {@link #curEditTimeSlot}.
     */
    final long[] recentEditTimes = new long[editTimeWindow];
    /**
     * @see #recentEditTimes
     */
    int curEditTimeSlot = 0;

    /**
     * Number of edits that will be performed before the object enters
     * simulation mode, or -1 if there is no limit on the number of edits.
     */
    int remainingEdits = -1;

    /**
     * Creates an object to modify data on a Wikibase site. The API is used to
     * request the changes. The site URI is necessary since it is not contained
     * in the data retrieved from the API.
     *
     * @param connection
     *            {@link ApiConnection} Object to send the requests
     * @param siteUri
     *            the URI identifying the site that is accessed (usually the
     *            prefix of entity URIs), e.g.,
     *            "http://www.wikidata.org/entity/"
     */
    public WbEditEntityAction(ApiConnection connection, String siteUri) {
        this.connection = connection;
        this.siteIri = siteUri;
    }

    /**
     * Returns the current value of the maxlag parameter. It specifies the
     * number of seconds. To save actions causing any more site replication lag,
     * this parameter can make the client wait until the replication lag is less
     * than the specified value. In case of excessive lag, error code "maxlag"
     * is returned upon API requests.
     *
     * @return current setting of the maxlag parameter
     */
    public int getMaxLag() {
        return this.maxLag;
    }

    /**
     * Set the value of the maxlag parameter. If unsure, keep the default. See
     * {@link #getMaxLag()} for details.
     *
     * @param maxLag
     *            the new value in seconds
     */
    public void setMaxLag(int maxLag) {
        this.maxLag = maxLag;
    }

    /**
     * Returns the number of edits that will be performed before entering
     * simulation mode, or -1 if there is no limit on the number of edits
     * (default).
     *
     * @return number of remaining edits
     */
    public int getRemainingEdits() {
        return this.remainingEdits;
    }

    /**
     * Sets the number of edits that this object can still perform. Thereafter,
     * edits will only be prepared but not actually performed in the Web API.
     * This function is useful to do a defined number of test edits. If this
     * number is set to 0, then no edits will be performed. If it is set to -1
     * (or any other negative number), then there is no limit on the edits
     * performed.
     *
     * @param remainingEdits
     *            number of edits that can still be performed, or -1 to disable
     *            this limit (default setting)
     */
    public void setRemainingEdits(int remainingEdits) {
        this.remainingEdits = remainingEdits;
    }

    /**
     * Returns the average time in milliseconds that one edit will take. This
     * time is enforced to avoid overloading the site with too many edits, and
     * also to throttle the rate of editing (which is useful to stop a bot in
     * case of errors). Individual edits can be faster than this, but if several
     * consecutive edits are above this rate, the program will pause until the
     * expected speed is reached again. The delay is based on real system time.
     * This means that it will only wait as long as necessary. If your program
     * takes time between edits for other reasons, there will be no additional
     * delay caused by this feature.
     *
     * @return average time per edit in milliseconds
     */
    public int getAverageTimePerEdit() {
        return this.averageMsecsPerEdit;
    }

    /**
     * Sets the average time that a single edit should take, measured in
     * milliseconds. See {@link #getAverageTimePerEdit()} for details.
     *
     * @param milliseconds
     *            the new value in milliseconds
     */
    public void setAverageTimePerEdit(int milliseconds) {
        this.averageMsecsPerEdit = milliseconds;
    }

    /**
     * Executes the API action "wbeditentity" for the given parameters. Created
     * or modified items are returned as a result. In particular, this is
     * relevant to find out about the id assigned to a newly created entity.
     * <p>
     * Unless the parameter clear is true, data of existing entities will be
     * modified or added, but not deleted. For labels, descriptions, and
     * aliases, this happens by language. In particular, if an item has English
     * and German aliases, and an edit action writes a new English alias, then
     * this new alias will replace all previously existing English aliases,
     * while the German aliases will remain untouched. In contrast, adding
     * statements for a certain property will not delete existing statements of
     * this property. In fact, it is even possible to create many copies of the
     * exact same statement. A special JSON syntax exists for deleting specific
     * statements.
     * <p>
     * See the <a href=
     * "https://www.wikidata.org/w/api.php?action=help&modules=wbeditentity"
     * >online API documentation</a> for further information.
     * <p>
     * TODO: There is currently no way to delete the label, description, or
     * aliases for a particular language without clearing all data. Empty
     * strings are not accepted. One might achieve this by adapting the JSON
     * serialization to produce null values for such strings, and for alias
     * lists that contain only such strings.
     *
     * @param id
     *            the id of the entity to be edited; if used, the site and title
     *            parameters must be null
     * @param site
     *            when selecting an entity by title, the site key for the title,
     *            e.g., "enwiki"; if used, title must also be given but id must
     *            be null
     * @param title
     *            string used to select an entity by title; if used, site must
     *            also be given but id must be null
     * @param newEntity
     *            used for creating a new entity of a given type; the value
     *            indicates the intended entity type; possible values include
     *            "item" and "property"; if used, the parameters id, site, and
     *            title must be null
     * @param data
     *            JSON representation of the data that is to be written; this is
     *            a mandatory parameter
     * @param clear
     *            if true, existing data will be cleared (deleted) before
     *            writing the new data
     * @param bot
     *            if true, edits will be flagged as "bot edits" provided that
     *            the logged in user is in the bot group; for regular users, the
     *            flag will just be ignored
     * @param baserevid
     *            the revision of the data that the edit refers to or 0 if this
     *            should not be submitted; when used, the site will ensure that
     *            no edit has happened since this revision to detect edit
     *            conflicts; it is recommended to use this whenever in all
     *            operations where the outcome depends on the state of the
     *            online data
     * @param summary
     *            summary for the edit; will be prepended by an automatically
     *            generated comment; the length limit of the autocomment
     *            together with the summary is 260 characters: everything above
     *            that limit will be cut off
     * @return modified or created entity document
     * @throws IOException
     *             if there was an IO problem. such as missing network
     *             connection
     * @throws MediaWikiApiErrorException
     *             if the API returns an error
     */
    public EntityDocument wbEditEntity(String id, String site, String title, String newEntity, String data,
            boolean clear, boolean bot, long baserevid, String summary)
            throws IOException, MediaWikiApiErrorException {

        Validate.notNull(data, "Data parameter cannot be null when editing entity data");

        Map<String, String> parameters = new HashMap<String, String>();
        parameters.put(ApiConnection.PARAM_ACTION, "wbeditentity");

        if (newEntity != null) {
            parameters.put("new", newEntity);
            if (title != null || site != null || id != null) {
                throw new IllegalArgumentException(
                        "Cannot use parameters \"id\", \"site\", or \"title\" when creating a new entity.");
            }
        } else if (id != null) {
            parameters.put("id", id);
            if (title != null || site != null) {
                throw new IllegalArgumentException(
                        "Cannot use parameters \"site\" or \"title\" when using id to edit entity data");
            }
        } else if (title != null) {
            if (site == null) {
                throw new IllegalArgumentException(
                        "Site parameter is required when using title parameter to edit entity data.");
            }
            parameters.put("site", site);
            parameters.put("title", title);
        } else {
            throw new IllegalArgumentException(
                    "This action must create a new item, or specify an id, or specify a site and title.");
        }

        parameters.put("data", data);

        if (bot) {
            parameters.put("bot", "");
        }

        if (baserevid != 0) {
            parameters.put("baserevid", Long.toString(baserevid));
        }

        if (clear) {
            parameters.put("clear", "");
        }

        if (summary != null) {
            parameters.put("summary", summary);
        }

        parameters.put("maxlag", new Integer(this.maxLag).toString());
        parameters.put("token", getCsrfToken());
        parameters.put(ApiConnection.PARAM_FORMAT, "json");

        if (this.remainingEdits > 0) {
            this.remainingEdits--;
        } else if (this.remainingEdits == 0) {
            logger.info("Not editing entity (simulation mode). Request parameters were: " + parameters.toString());
            return null;
        }

        checkEditSpeed();

        EntityDocument result = null;
        int retry = 5;
        MediaWikiApiErrorException lastException = null;
        while (retry > 0) {
            try {
                result = doWbEditEntity(parameters);
                break;
            } catch (TokenErrorException e) { // try again with a fresh token
                lastException = e;
                refreshCsrfToken();
                parameters.put("token", getCsrfToken());
            } catch (MaxlagErrorException e) { // wait for 5 seconds
                lastException = e;
                logger.warn(e.getMessage() + " -- pausing for 5 seconds.");
                try {
                    Thread.sleep(MAXLAG_SLEEP_TIME);
                } catch (InterruptedException ex) {
                    Thread.currentThread().interrupt();
                }
            }
            retry--;
        }

        if (result == null) {
            logger.error("Gave up after several retries. Last error was: " + lastException.toString());
            throw lastException;
        }

        return result;
    }

    /**
     * Executes a call to wbeditentity. Minimal error handling.
     *
     * @param parameters
     *            the parameters to be used in the call
     * @return the entity document that is returned, or null in case of errors
     * @throws IOException
     *             if there were IO errors
     * @throws MediaWikiApiErrorException
     *             if the API returned an error
     */
    private EntityDocument doWbEditEntity(Map<String, String> parameters)
            throws IOException, MediaWikiApiErrorException {

        try (InputStream response = this.connection.sendRequest("POST", parameters)) {

            JsonNode root = this.mapper.readTree(response);

            this.connection.checkErrors(root);
            this.connection.logWarnings(root);

            if (root.has("item")) {
                return parseJsonResponse(root.path("item"));
            } else if (root.has("property")) {
                // TODO: not tested because of missing
                // permissions
                return parseJsonResponse(root.path("property"));
            } else if (root.has("entity")) {
                return parseJsonResponse(root.path("entity"));
            } else {
                throw new JsonMappingException("No entity document found in API response.");
            }
        }
    }

    /**
     * Returns a CSRF (Cross-Site Request Forgery) token as required to edit
     * data.
     */
    private String getCsrfToken() {
        if (this.csrfToken == null) {
            refreshCsrfToken();
        }
        return this.csrfToken;
    }

    /**
     * Obtains and sets a new CSRF token, whether or not there is already a
     * token set right now.
     */
    private void refreshCsrfToken() {

        this.csrfToken = fetchCsrfToken();
        // TODO if this is null, we could try to recover here:
        // (1) Check if we are still logged in; maybe log in again
        // (2) If there is another error, maybe just run the operation again
    }

    /**
     * Executes a API query action to get a new CSRF (Cross-Site Request
     * Forgery) token. The method only executes the action, without doing any
     * checks first. If errors occur, they are logged and null is returned.
     *
     * @return newly retrieved token or null if no token was retrieved
     */
    private String fetchCsrfToken() {
        Map<String, String> params = new HashMap<String, String>();
        params.put(ApiConnection.PARAM_ACTION, "query");
        params.put("meta", "tokens");
        params.put(ApiConnection.PARAM_FORMAT, "json");

        try (InputStream response = this.connection.sendRequest("POST", params)) {

            JsonNode root = this.mapper.readTree(response);

            this.connection.checkErrors(root);
            this.connection.logWarnings(root);

            return root.path("query").path("tokens").path("csrftoken").textValue();
        } catch (IOException | MediaWikiApiErrorException e) {
            logger.error("Error when trying to fetch csrf token: " + e.toString());
        }
        return null;
    }

    /**
     * Parse a JSON response to extract an entity document.
     * <p>
     * TODO This method currently contains code to work around Wikibase issue
     * https://phabricator.wikimedia.org/T73349. This should be removed once the
     * issue is fixed.
     *
     * @param entityNode
     *            the JSON node that should contain the entity document data
     * @return the entitiy document, or null if there were unrecoverable errors
     * @throws IOException
     * @throws JsonProcessingException
     */
    private EntityDocument parseJsonResponse(JsonNode entityNode) throws JsonProcessingException, IOException {
        try {
            JacksonTermedStatementDocument ed = mapper.treeToValue(entityNode,
                    JacksonTermedStatementDocument.class);
            ed.setSiteIri(this.siteIri);

            return ed;
        } catch (JsonProcessingException e) {
            logger.warn("Error when reading JSON for entity " + entityNode.path("id").asText("UNKNOWN") + ": "
                    + e.toString() + "\nTrying to manually fix issue https://phabricator.wikimedia.org/T73349.");
            String jsonString = entityNode.toString();
            jsonString = jsonString.replace("\"sitelinks\":[]", "\"sitelinks\":{}")
                    .replace("\"labels\":[]", "\"labels\":{}").replace("\"aliases\":[]", "\"aliases\":{}")
                    .replace("\"claims\":[]", "\"claims\":{}")
                    .replace("\"descriptions\":[]", "\"descriptions\":{}");

            ObjectReader documentReader = this.mapper.reader(JacksonTermedStatementDocument.class);

            JacksonTermedStatementDocument ed;
            ed = documentReader.readValue(jsonString);
            ed.setSiteIri(this.siteIri);
            return ed;
        }
    }

    /**
     * Makes sure that we are not editing too fast. The method stores the last
     * {@link WbEditEntityAction#editTimeWindow} time points when an edit was
     * made. If the time since the oldest edit in this window is shorter than
     * {@link #averageMsecsPerEdit} milliseconds, then the method will pause the
     * thread for the remaining time.
     */
    private void checkEditSpeed() {
        long currentTime = System.nanoTime();
        int nextIndex = (this.curEditTimeSlot + 1) % editTimeWindow;
        if (this.recentEditTimes[nextIndex] != 0 && (currentTime - this.recentEditTimes[nextIndex])
                / 1000000 < this.averageMsecsPerEdit * editTimeWindow) {
            long sleepTime = this.averageMsecsPerEdit * editTimeWindow
                    - (currentTime - this.recentEditTimes[nextIndex]) / 1000000;
            logger.info("We are editing too fast. Pausing for " + sleepTime + " milliseconds.");
            try {
                Thread.sleep(sleepTime);
            } catch (InterruptedException ex) {
                Thread.currentThread().interrupt();
            }
            currentTime = System.nanoTime();
        }

        this.recentEditTimes[nextIndex] = currentTime;
        this.curEditTimeSlot = nextIndex;
    }

}