Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.marmotta.ldclient.services.provider; import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.cookie.DateParseException; import org.apache.http.impl.cookie.DateUtils; import org.apache.http.util.EntityUtils; import org.apache.marmotta.commons.collections.CollectionUtils; import org.apache.marmotta.commons.http.ContentType; import org.apache.marmotta.ldclient.api.endpoint.Endpoint; import org.apache.marmotta.ldclient.api.ldclient.LDClientService; import org.apache.marmotta.ldclient.api.provider.DataProvider; import org.apache.marmotta.ldclient.exception.DataRetrievalException; import org.apache.marmotta.ldclient.model.ClientResponse; import org.openrdf.model.Model; import org.openrdf.model.impl.TreeModel; import org.openrdf.repository.RepositoryException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.InputStream; import java.util.*; import static org.apache.marmotta.commons.http.MarmottaHttpUtils.parseContentType; /** * Add file description here! * <p/> * Author: Sebastian Schaffert */ public abstract class AbstractHttpProvider implements DataProvider { public static final int RETRY_AFTER = 60; private static Logger log = LoggerFactory.getLogger(AbstractHttpProvider.class); /** * Build the URL to use to call the webservice in order to retrieve the data for the resource passed as argument. * In many cases, this will just return the URI of the resource (e.g. Linked Data), but there might be data providers * that use different means for accessing the data for a resource, e.g. SPARQL or a Cache. * * * * @param resourceUri * @param endpoint endpoint configuration for the data provider (optional) * @return */ protected abstract List<String> buildRequestUrl(String resourceUri, Endpoint endpoint) throws DataRetrievalException; /** * Parse the HTTP response entity returned by the web service call and return its contents in a Sesame RDF * repository also passed as argument. The content type returned by the web service is passed as argument to help * the implementation decide how to parse the data. The implementation can return a list of additional pages to * retrieve for completing the data of the resource * * * * * * @param resourceUri * @param model an RDF repository for storing an RDF representation of the dataset located at the remote resource. * @param in input stream as returned by the remote webservice * @param contentType content type as returned in the HTTP headers of the remote webservice * @return a possibly empty list of URLs of additional resources to retrieve to complete the content * @throws java.io.IOException in case an error occurs while reading the input stream */ protected abstract List<String> parseResponse(String resourceUri, String requestUrl, Model model, InputStream in, String contentType) throws DataRetrievalException; /** * Retrieve the data for a resource using the given http client and endpoint definition. The service is * supposed to manage the connection handling itself. See {@link AbstractHttpProvider} * for a generic implementation of this method. * * * * @param resource the resource to be retrieved * @param endpoint the endpoint definition * @return a completely specified client response, including expiry information and the set of triples */ @Override public ClientResponse retrieveResource(String resource, LDClientService client, Endpoint endpoint) throws DataRetrievalException { try { String contentType; if (endpoint != null && endpoint.getContentTypes().size() > 0) { contentType = CollectionUtils.fold(endpoint.getContentTypes(), new CollectionUtils.StringSerializer<ContentType>() { @Override public String serialize(ContentType contentType) { return contentType.toString("q"); } }, ","); } else { contentType = CollectionUtils.fold(Arrays.asList(listMimeTypes()), ","); } long defaultExpires = client.getClientConfiguration().getDefaultExpiry(); if (endpoint != null && endpoint.getDefaultExpiry() != null) { defaultExpires = endpoint.getDefaultExpiry(); } final ResponseHandler handler = new ResponseHandler(resource, endpoint); // a queue for queuing the request URLs needed to build the query response Queue<String> requestUrls = new LinkedList<String>(); requestUrls.addAll(buildRequestUrl(resource, endpoint)); Set<String> visited = new HashSet<String>(); String requestUrl = requestUrls.poll(); while (requestUrl != null) { if (!visited.contains(requestUrl)) { HttpGet get = new HttpGet(requestUrl); try { get.setHeader("Accept", contentType); get.setHeader("Accept-Language", "*"); // PoolParty compatibility log.info("retrieving resource data for {} from '{}' endpoint, request URI is <{}>", new Object[] { resource, getName(), get.getURI().toASCIIString() }); handler.requestUrl = requestUrl; List<String> additionalRequestUrls = client.getClient().execute(get, handler); requestUrls.addAll(additionalRequestUrls); visited.add(requestUrl); } finally { get.releaseConnection(); } } requestUrl = requestUrls.poll(); } Date expiresDate = handler.expiresDate; if (expiresDate == null) { expiresDate = new Date(System.currentTimeMillis() + defaultExpires * 1000); } long min_expires = System.currentTimeMillis() + client.getClientConfiguration().getMinimumExpiry() * 1000; if (expiresDate.getTime() < min_expires) { log.info( "expiry time returned by request lower than minimum expiration time; using minimum time instead"); expiresDate = new Date(min_expires); } if (log.isInfoEnabled()) { log.info("retrieved {} triples for resource {}; expiry date: {}", new Object[] { handler.triples.size(), resource, expiresDate }); } ClientResponse result = new ClientResponse(handler.httpStatus, handler.triples); result.setExpires(expiresDate); return result; } catch (RepositoryException e) { log.error("error while initialising Sesame repository; classpath problem?", e); throw new DataRetrievalException("error while initialising Sesame repository; classpath problem?", e); } catch (ClientProtocolException e) { log.error("HTTP client error while trying to retrieve resource {}: {}", resource, e.getMessage()); throw new DataRetrievalException("I/O error while trying to retrieve resource " + resource, e); } catch (IOException e) { log.error("I/O error while trying to retrieve resource {}: {}", resource, e.getMessage()); throw new DataRetrievalException("I/O error while trying to retrieve resource " + resource, e); } catch (RuntimeException ex) { log.error("Unknown error while trying to retrieve resource {}: {}", resource, ex.getMessage()); throw new DataRetrievalException("Unknown error while trying to retrieve resource " + resource, ex); } } /** * Check whether the content type returned by the server is acceptable to the endpoint and data provider */ protected boolean isValidContentType(String contentType, Endpoint endpoint) { if (endpoint != null && endpoint.getContentTypes().size() > 0) { ContentType parsed = parseContentType(contentType); for (ContentType valid : endpoint.getContentTypes()) { if (valid.matches(parsed) || valid.matchesWildcard(parsed)) { return true; } } return false; } else { // TODO: should probably be removed, since it is not used for (String type : listMimeTypes()) { if (type.split(";")[0].equalsIgnoreCase(contentType)) return true; } return false; } } private class ResponseHandler implements org.apache.http.client.ResponseHandler<List<String>> { private Date expiresDate; private String requestUrl; // the repository where the triples will be stored in case the data providers return them private final Model triples; private final Endpoint endpoint; private final String resource; private int httpStatus; public ResponseHandler(String resource, Endpoint endpoint) throws RepositoryException { this.resource = resource; this.endpoint = endpoint; triples = new TreeModel(); } @Override public List<String> handleResponse(HttpResponse response) throws ClientProtocolException, IOException { ArrayList<String> requestUrls = new ArrayList<String>(); if (response.getStatusLine().getStatusCode() >= 200 && response.getStatusLine().getStatusCode() < 400) { final HttpEntity entity = response.getEntity(); if (entity == null) throw new IOException("no content returned by Linked Data resource " + resource); if (!isValidContentType(entity.getContentType().getValue().split(";")[0], endpoint)) { // FIXME: here was get.abort() throw new IOException("invalid content returned by Linked Data resource " + resource + ": " + entity.getContentType().getValue()); } this.httpStatus = response.getStatusLine().getStatusCode(); if (entity != null) { String parseContentType = "application/rdf+xml"; if (endpoint != null && "SPARQL".equals(endpoint.getType())) { parseContentType = "application/sparql-results+xml"; } else if (entity.getContentType() != null) { parseContentType = entity.getContentType().getValue().split(";")[0]; } InputStream in = entity.getContent(); try { List<String> urls = parseResponse(resource, requestUrl, triples, in, parseContentType); requestUrls.addAll(urls); if (expiresDate == null) { Header expires = response.getFirstHeader("Expires"); if (expires != null) { try { expiresDate = DateUtils.parseDate(expires.getValue()); } catch (DateParseException e) { log.debug("error parsing Expires: header"); } } } } catch (DataRetrievalException e) { // FIXME: get.abort(); throw new IOException(e); } finally { in.close(); } } EntityUtils.consume(entity); } else if (response.getStatusLine().getStatusCode() == 500 || response.getStatusLine().getStatusCode() == 503 || response.getStatusLine().getStatusCode() == 504) { this.httpStatus = response.getStatusLine().getStatusCode(); Header retry = response.getFirstHeader("Retry-After"); if (retry != null) { try { int duration = Integer.parseInt(retry.getValue()); expiresDate = new Date(System.currentTimeMillis() + duration * 1000); } catch (NumberFormatException ex) { log.debug("error parsing Retry-After: header"); } } else { expiresDate = new Date(System.currentTimeMillis() + RETRY_AFTER * 1000); } } else { log.error("the HTTP request failed (status: {})", response.getStatusLine()); throw new ClientProtocolException( "the HTTP request failed (status: " + response.getStatusLine() + ")"); } return requestUrls; } } }