Java tutorial
/** * Copyright (C) 2013 2015 SLUB Dresden & Avantgarde Labs GmbH (<code@dswarm.org>) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.dswarm.wikidataimporter; import java.io.IOException; import java.net.URL; import java.util.HashMap; import java.util.Map; import java.util.Properties; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import javax.ws.rs.client.Client; import javax.ws.rs.client.ClientBuilder; import javax.ws.rs.client.Entity; import javax.ws.rs.client.WebTarget; import javax.ws.rs.core.Cookie; import javax.ws.rs.core.HttpHeaders; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.NewCookie; import javax.ws.rs.core.Response; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.io.Resources; import org.apache.commons.lang3.concurrent.BasicThreadFactory; import org.glassfish.jersey.client.ClientProperties; import org.glassfish.jersey.client.rx.RxInvocationBuilder; import org.glassfish.jersey.client.rx.RxWebTarget; import org.glassfish.jersey.client.rx.rxjava.RxObservable; import org.glassfish.jersey.client.rx.rxjava.RxObservableInvoker; import org.glassfish.jersey.media.multipart.FormDataMultiPart; import org.glassfish.jersey.media.multipart.MultiPartFeature; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.wikidata.wdtk.datamodel.helpers.DatamodelConverter; import org.wikidata.wdtk.datamodel.implementation.PropertyDocumentImpl; import org.wikidata.wdtk.datamodel.interfaces.DataObjectFactory; import org.wikidata.wdtk.datamodel.interfaces.EntityDocument; import org.wikidata.wdtk.datamodel.interfaces.ItemDocument; import org.wikidata.wdtk.datamodel.json.jackson.JacksonObjectFactory; import org.wikidata.wdtk.datamodel.json.jackson.JacksonPropertyDocument; import rx.Observable; import rx.schedulers.Schedulers; /** * @author tgaengler */ public class WikibaseAPIClient { private static final Logger LOG = LoggerFactory.getLogger(WikibaseAPIClient.class); private static final Properties properties = new Properties(); private static final String MEDIAWIKI_API_ENDPOINT = "mediawiki_api_endpoint"; private static final String FALLBACK_MEDIAWIKI_API_ENDPOINT = "http://localhost:1234/whoknows"; private static final String DSWARM_USER_AGENT_IDENTIFIER = "DMP 2000"; private static final String MEDIAWIKI_USERNAME = "mediawiki_username"; private static final String MEDIAWIKI_PASSWORD = "mediawiki_password"; private static final String wikibaseAPIBaseURI; static { final URL resource = Resources.getResource("dswarm.properties"); try { properties.load(resource.openStream()); } catch (final IOException e) { LOG.error("Could not load dswarm.properties", e); } wikibaseAPIBaseURI = properties.getProperty(MEDIAWIKI_API_ENDPOINT, FALLBACK_MEDIAWIKI_API_ENDPOINT); } private static final String CHUNKED = "CHUNKED"; private static final int CHUNK_SIZE = 1024; private static final int REQUEST_TIMEOUT = 20000000; private static final String DSWARM_WIKIDATA_GDM_IMPORTER_THREAD_NAMING_PATTERN = "dswarm-wikidata-gdm-importer-%d"; private static final ExecutorService EXECUTOR_SERVICE = Executors .newCachedThreadPool(new BasicThreadFactory.Builder().daemon(false) .namingPattern(DSWARM_WIKIDATA_GDM_IMPORTER_THREAD_NAMING_PATTERN).build()); private static final ClientBuilder BUILDER = ClientBuilder.newBuilder().register(MultiPartFeature.class) .property(ClientProperties.CHUNKED_ENCODING_SIZE, CHUNK_SIZE) .property(ClientProperties.REQUEST_ENTITY_PROCESSING, CHUNKED) .property(ClientProperties.OUTBOUND_CONTENT_LENGTH_BUFFER, CHUNK_SIZE) .property(ClientProperties.CONNECT_TIMEOUT, REQUEST_TIMEOUT) .property(ClientProperties.READ_TIMEOUT, REQUEST_TIMEOUT); private static final String MEDIAWIKI_API_ACTION_IDENTIFIER = "action"; private static final String MEDIAWIKI_API_FORMAT_IDENTIFIER = "format"; private static final String MEDIAWIKI_API_LOGIN = "login"; private static final String MEDIAWIKI_API_LGNAME_IDENTIFIER = "lgname"; private static final String MEDIAWIKI_API_LGPASSWORD_IDENTIFIER = "lgpassword"; private static final String MEDIAWIKI_API_LGTOKEN_IDENTIFIER = "lgtoken"; private static final String MEDIAWIKI_API_QUERY = "query"; private static final String MEDIAWIKI_API_META_IDENTIFIER = "meta"; private static final String MEDIAWIKI_API_CONTINUE_IDENTIFIER = "continue"; private static final String WIKIBASE_API_NEW_IDENTIFIER = "new"; private static final String WIKIBASE_API_DATA_IDENTIFIER = "data"; private static final String MEDIAWIKI_API_TOKEN_IDENTIFIER = "token"; private static final String MEDIAWIKI_API_JSON_FORMAT = "json"; private static final String MEDIAWIKI_API_TOKENS_IDENTIFIER = "tokens"; private static final String MEDIAWIKI_API_CSRFTOKEN_IDENTIFIER = "csrftoken"; public static final String WIKIBASE_API_ENTITY_TYPE_ITEM = "item"; public static final String WIKIBASE_API_ENTITY_TYPE_PROPERTY = "property"; private static final String WIKIBASE_API_EDIT_ENTITY = "wbeditentity"; private static final ObjectMapper MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_EMPTY) .setSerializationInclusion(JsonInclude.Include.NON_NULL); private static final DataObjectFactory jsonObjectFactory = new JacksonObjectFactory(); private static final DatamodelConverter datamodelConverter = new DatamodelConverter(jsonObjectFactory); private final String editToken; private final Map<String, NewCookie> cookies; public WikibaseAPIClient() throws WikidataImporterException { final Map<String, Map<String, NewCookie>> result = generateEditToken(); if (result == null) { final String message = "couldn't generate edit token successfully - API cannot be utilised for edit requests"; LOG.error(message); throw new WikidataImporterException(message); } final Map.Entry<String, Map<String, NewCookie>> resultEntry = result.entrySet().iterator().next(); editToken = resultEntry.getKey(); cookies = resultEntry.getValue(); } private Map<String, Map<String, NewCookie>> generateEditToken() { LOG.debug("try to generate edit token"); // 0. read user name + password from properties final String username = getProperty(MEDIAWIKI_USERNAME); final String password = getProperty(MEDIAWIKI_PASSWORD); // 1. login request return login(username, password).flatMap(loginResponse -> { // 1.1 get token from login request response final String token = getToken(loginResponse); // 1.2 get cookies from login request response final Map<String, NewCookie> loginRequestCookies = getCookies(loginResponse); if (token == null || loginRequestCookies == null) { LOG.error("couldn't retrieve token successfully - cannot continue edit token generation"); return Observable.empty(); } LOG.debug("retrieved token with login credentials successfully"); // 2. confirm login request return confirmLogin(token, loginRequestCookies).flatMap(confirmLoginResponse -> { // 2.1 get cookies from login confirm response final Map<String, NewCookie> confirmLoginCookies = getCookies(confirmLoginResponse); if (confirmLoginCookies == null) { LOG.error("couldn't confirm login token successfully - cannot continue edit token generation"); return Observable.empty(); } LOG.debug("confirmed login with token and cookies successfully"); // 3. retrieve edit token request return retrieveEditToken(confirmLoginCookies).map(retrieveEditTokenResponse -> { // 3.1 get edit token from edit token response final String editToken = getEditToken(retrieveEditTokenResponse); // 3.2 get cookies from edit token response final Map<String, NewCookie> editTokenCookies = getCookies(retrieveEditTokenResponse); if (editTokenCookies == null) { LOG.error( "couldn't retrieve edit token successfully - cannot continue edit token generation"); return null; } LOG.debug("retrieved edit token with cookies successfully"); // 3.3 merge cookies from response from 2 + 3 loginRequestCookies.putAll(editTokenCookies); final Map<String, Map<String, NewCookie>> result = new HashMap<>(); result.put(editToken, loginRequestCookies); LOG.debug("generated edit token successfully"); return result; }); }); }).toBlocking().firstOrDefault(null); } public static Observable<Response> login(final String username, final String password) { LOG.debug("try to retrieve token with login credentials"); final RxWebTarget<RxObservableInvoker> rxWebTarget = rxWebTarget(); final RxObservableInvoker rx = rxWebTarget.request() .header(HttpHeaders.USER_AGENT, DSWARM_USER_AGENT_IDENTIFIER).rx(); final FormDataMultiPart form = new FormDataMultiPart() .field(MEDIAWIKI_API_ACTION_IDENTIFIER, MEDIAWIKI_API_LOGIN) .field(MEDIAWIKI_API_LGNAME_IDENTIFIER, username) .field(MEDIAWIKI_API_LGPASSWORD_IDENTIFIER, password) .field(MEDIAWIKI_API_FORMAT_IDENTIFIER, MEDIAWIKI_API_JSON_FORMAT); return excutePOST(rx, form); } public static Observable<Response> confirmLogin(final String token, final Map<String, NewCookie> cookies) { LOG.debug("try to confirm login with token and cookies"); final RxObservableInvoker rx = buildBaseRequestWithCookies(cookies); final FormDataMultiPart form = new FormDataMultiPart() .field(MEDIAWIKI_API_ACTION_IDENTIFIER, MEDIAWIKI_API_LOGIN) .field(MEDIAWIKI_API_LGTOKEN_IDENTIFIER, token); return excutePOST(rx, form); } public static Observable<Response> retrieveEditToken(final Map<String, NewCookie> cookies) { LOG.debug("try to retrieve edit token with cookies"); final RxObservableInvoker rx = buildBaseRequestWithCookies(cookies); final FormDataMultiPart form = new FormDataMultiPart() .field(MEDIAWIKI_API_ACTION_IDENTIFIER, MEDIAWIKI_API_QUERY) .field(MEDIAWIKI_API_META_IDENTIFIER, MEDIAWIKI_API_TOKENS_IDENTIFIER) .field(MEDIAWIKI_API_CONTINUE_IDENTIFIER, "") .field(MEDIAWIKI_API_FORMAT_IDENTIFIER, MEDIAWIKI_API_JSON_FORMAT); return excutePOST(rx, form); } public static String getToken(final Response loginResponse) { try { final String responseBody = loginResponse.readEntity(String.class); if (responseBody == null) { LOG.error("cannot extract token - response body is not available"); return null; } final ObjectNode json = MAPPER.readValue(responseBody, ObjectNode.class); if (json == null) { LOG.error("cannot extract token - response JSON is not available"); return null; } final JsonNode loginNode = json.get(MEDIAWIKI_API_LOGIN); if (loginNode == null) { LOG.error("cannot extract token - '{}' node is not available in response JSON '{}'", MEDIAWIKI_API_LOGIN, responseBody); return null; } final JsonNode tokenNode = loginNode.get(MEDIAWIKI_API_TOKEN_IDENTIFIER); if (tokenNode == null) { LOG.error("cannot extract token - '{}' node is not available in response JSON '{}'", MEDIAWIKI_API_TOKEN_IDENTIFIER, responseBody); return null; } return tokenNode.asText(); } catch (final Exception e) { LOG.error( "cannot extract token - an error occurred while trying to extract the token from the response body", e); return null; } } public static String getEditToken(final Response editTokenResponse) { try { final String responseBody = editTokenResponse.readEntity(String.class); if (responseBody == null) { LOG.error("cannot extract edit token - response body is not available"); return null; } final ObjectNode json = MAPPER.readValue(responseBody, ObjectNode.class); if (json == null) { LOG.error("cannot extract edit token - response JSON is not available"); return null; } final JsonNode queryNode = json.get(MEDIAWIKI_API_QUERY); if (queryNode == null) { LOG.error("cannot extract edit token - '{}' node is not available in response JSON '{}'", MEDIAWIKI_API_QUERY, responseBody); return null; } final JsonNode tokensNode = queryNode.get(MEDIAWIKI_API_TOKENS_IDENTIFIER); if (tokensNode == null) { LOG.error("cannot extract edit token - '{}' node is not available in response JSON '{}'", MEDIAWIKI_API_TOKENS_IDENTIFIER, responseBody); return null; } final JsonNode csrfTokenNode = tokensNode.get(MEDIAWIKI_API_CSRFTOKEN_IDENTIFIER); if (csrfTokenNode == null) { LOG.error("cannot extract edit token - '{}' node is not available in response JSON '{}'", MEDIAWIKI_API_CSRFTOKEN_IDENTIFIER, responseBody); return null; } return csrfTokenNode.asText(); } catch (final Exception e) { LOG.error( "cannot extract edit token - an error occurred while trying to extract the edit token from the response body", e); return null; } } public Observable<Response> createEntity(final EntityDocument entity, final String entityType) throws JsonProcessingException, WikidataImporterException { final EntityDocument jacksonEntity; switch (entityType) { case WIKIBASE_API_ENTITY_TYPE_ITEM: //jacksonEntity = JacksonItemDocument.fromItemDocumentImpl((ItemDocumentImpl) entity); jacksonEntity = datamodelConverter.copy((ItemDocument) entity); break; case WIKIBASE_API_ENTITY_TYPE_PROPERTY: jacksonEntity = JacksonPropertyDocument.fromPropertyDocumentImpl((PropertyDocumentImpl) entity); break; default: final String message = String.format("unknown entity type '%s'", entityType); LOG.error(message); throw new WikidataImporterException(message); } final String entityJSONString = MAPPER.writeValueAsString(jacksonEntity); LOG.debug("create new '{}' with '{}'", entityType, entityJSONString); final RxObservableInvoker rx = buildBaseRequestWithCookies(cookies); final FormDataMultiPart form = new FormDataMultiPart() .field(MEDIAWIKI_API_ACTION_IDENTIFIER, WIKIBASE_API_EDIT_ENTITY) .field(WIKIBASE_API_NEW_IDENTIFIER, entityType) .field(WIKIBASE_API_DATA_IDENTIFIER, entityJSONString) .field(MEDIAWIKI_API_TOKEN_IDENTIFIER, editToken) .field(MEDIAWIKI_API_FORMAT_IDENTIFIER, MEDIAWIKI_API_JSON_FORMAT); //form.bodyPart(entityJSONString, MediaType.APPLICATION_JSON_TYPE); return excutePOST(rx, form); } public static Map<String, NewCookie> getCookies(final Response response) { return response.getCookies(); } private static RxObservableInvoker buildBaseRequestWithCookies(final Map<String, NewCookie> cookies) { final RxWebTarget<RxObservableInvoker> rxWebTarget = rxWebTarget(); RxInvocationBuilder<RxObservableInvoker> request = rxWebTarget.request().header(HttpHeaders.USER_AGENT, DSWARM_USER_AGENT_IDENTIFIER); if (cookies != null) { for (final Cookie cookie : cookies.values()) { request = request.cookie(cookie); } } return request.rx(); } private static Observable<Response> excutePOST(final RxObservableInvoker rx, final FormDataMultiPart form) { final Entity entityBody = Entity.entity(form, MediaType.MULTIPART_FORM_DATA); final Observable<Response> post = rx.post(entityBody).subscribeOn(Schedulers.from(EXECUTOR_SERVICE)); return post.filter(response -> response != null && response.getStatus() == 200); } private static Client client() { return BUILDER.build(); } private static WebTarget target() { return client().target(wikibaseAPIBaseURI); } private static WebTarget target(final String... path) { WebTarget target = target(); for (final String p : path) { target = target.path(p); } return target; } private static RxWebTarget<RxObservableInvoker> rxWebTarget() { final WebTarget target = target(); return RxObservable.from(target); } private static RxWebTarget<RxObservableInvoker> rxWebTarget(final String... path) { final WebTarget target = target(path); return RxObservable.from(target); } private static String getProperty(final String propertyKey) { final String propertyValue = properties.getProperty(propertyKey); if (propertyValue == null || propertyValue.trim().isEmpty()) { LOG.error("couldn't find property '{}' in properties file", propertyKey); } return propertyValue; } }