Java tutorial
/* * (C) Copyright 2016 Nuxeo SA (http://nuxeo.com/) and contributors. * * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Lesser General Public License * (LGPL) version 2.1 which accompanies this distribution, and is available at * http://www.gnu.org/licenses/lgpl-2.1.html * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * Contributors: * Michael Vachette * Thibaud Arguillere */ package org.nuxeo.http.blobprovider; import org.apache.commons.codec.binary.Base64; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.nuxeo.common.file.FileCache; import org.nuxeo.common.file.LRUFileCache; import org.nuxeo.ecm.core.api.Blob; import org.nuxeo.ecm.core.api.Blobs; import org.nuxeo.ecm.core.api.NuxeoException; import org.nuxeo.ecm.core.blob.AbstractBlobProvider; import org.nuxeo.ecm.core.blob.BlobManager.BlobInfo; import org.nuxeo.ecm.core.blob.ManagedBlob; import org.nuxeo.ecm.core.blob.SimpleManagedBlob; import org.nuxeo.ecm.core.model.Document; import org.nuxeo.runtime.api.Framework; import org.nuxeo.runtime.trackers.files.FileEventTracker; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; /** * Handle a blob living on a remote HTTP server, in read-only (no write to the * server, no synchronization) * <p> * Nuxeo will handle the blob as if it was living in its own blob store: * Thumbnail, full text, video storyboard, ... * <p> * First implementation: Support unauthenticated URLs or BASIC authentication * <p> * Because we don't redirect the URL (we don't override <code>getURI()</code>), * any download will fetch the file on the remote server => We should have some * cache mechanism for optimization, instead of downloading the file from the * distant url. This cache should be an option though, because in some * application, the distant server wants to keep track of all the downloads, * etc. * <p> * There is one default blob provider, named "http", contributed by the plug-in. * It is ready to use configuraiton parameters stored in nuxeo.conf file: * <ul> * <li>http.blobprovider.origin<br/> * Notice this parameter must also contains the protocol</li> * <li>http.blobprovider.auth.type</li> * <li>http.blobprovider.auth.login</li> * <li>http.blobprovider.auth.password</li> * <li>http.blobprovider.auth.moreHeadersJson</li> * </ul> * So you can just put these parameters in your configuration and it will work * as expected. * <p> * To setup another http-blob provider, contribute the same extension and change * the name ("my-http") and the properties. You can use the same mechanism as in * the default provider, and set up a property with a configuration parameter * using the following expression: * * <pre> * <property name="origin">${my.other.provider.origin:=}</property> * </pre> * * Just stating the obvious: You can't add a property that is not used here. The * current implementation supports the properties listed above (origin, * authentication type, ...). Well, you cn add it, it will just be ignored :-> * <p> * Also, notice that by default a blob provider allows connection to domains * that are not the one set in the "origin" property. In this case, the provider * assumes the call is always unauthenticated. So for example, if your * contributions has the * <code><property name="origin">http://my.site.com</property></code> property * and you use an url like "http://somethingelse.com/thefile.pdf", then the * provider will try to get the file with no authentication. * <p> * In this example, if you need to access "thefile.pdf" and the site requires * authentication, you must declare another http blob provider * * @since 8.1 */ public class HttpBlobProvider extends AbstractBlobProvider { @SuppressWarnings("unused") private static final Log log = LogFactory.getLog(HttpBlobProvider.class); // <-------------------- Configuration Parameters --------------------> // Names (keys) of the default parameters, as used in the default xml // contribution. // as a user of the blob provider, you are supposed to: // -> Setup the correct XML contribution to BlobProvider // -> And either hard code the values or use you own configuration // parameters public static final String KEY_ORIGIN = "http.blobprovider.origin"; public static final String KEY_AUTHENTICATION_TYPE = "http.blobprovider.auth.type"; public static final String KEY_AUTHENTICATION_LOGIN = "http.blobprovider.auth.login"; public static final String KEY_AUTHENTICATION_PWD = "http.blobprovider.auth.password"; public static final String KEY_AUTHENTICATION_MORE_HEADERS = "http.blobprovider.moreheaders"; public static final String KEY_AUTHENTICATION_USE_CACHE = "http.blobprovider.usecache"; public static final String KEY_AUTHENTICATION_CACHE_MAX_SIZE = "http.blobprovider.cache.maxSize"; public static final String KEY_AUTHENTICATION_CACHE_MAX_COUNT = "http.blobprovider.cache.maxCount"; public static final String KEY_AUTHENTICATION_CACHE_MIN_AGE = "http.blobprovider.cache.minAge"; // <-------------------- Names of properties in the XML // --------------------> public static final String PROPERTY_ORIGIN = "origin"; public static final String PROPERTY_AUTHENTICATION_TYPE = "authenticationType"; public static final String PROPERTY_LOGIN = "login"; public static final String PROPERTY_PWD = "password"; public static final String PROPERTY_MORE_HEADERS = "moreHeadersJson"; public static final String PROPERTY_USE_CACHE = "useCache"; public static final String PROPERTY_CACHE_MAX_SIZE = "cacheMaxSize"; public static final String PROPERTY_CACHE_MAX_COUNT = "cacheMaxCount"; public static final String PROPERTY_CACHE_MIN_AGE = "cacheMinAge"; // <-------------------- Other constants --------------------> protected static final String AUTH_NONE = "None"; protected static final String AUTH_BASIC = "Basic"; public static final String[] SUPPORTED_AUTHENTICATION_METHODS = { AUTH_NONE, AUTH_BASIC }; public static final String DEFAULT_PROVIDER = "http"; public static final long DEFAULT_CACHE_MAX_FILE_SIZE = 500 * 1024 * 1024; public static final long DEFAULT_CACHE_MAX_COUNT = 10000; public static final long DEFAULT_CACHE_MIN_AGE = 3600; // 1h // <-------------------- Implementation --------------------> protected String origin; protected String authenticationType; protected String authenticationLogin; protected String authenticationPwd; protected String basicAuthentication; protected HashMap<String, String> moreHeaders; protected File cachedir = null; protected FileCache fileCache = null; // <============================================================================> // <============================ NON PUBLIC METHODS // ============================> // <============================================================================> /* * Centralize handling of the key, that mixes the blobprovider Id and the * URL */ protected String extractUrl(ManagedBlob blob) { String key = blob.getKey(); // strip prefix int colon = key.indexOf(':'); if (colon >= 0 && key.substring(0, colon).equals(blobProviderId)) { key = key.substring(colon + 1); } return key; } /* * Here, we just make sure every variable is not null, so we can easily use * theValue.equals() everywhere for example.We also realign values to get * rid of case-sensitive comparison errors and be cool with the users of the * extension point ;-) */ protected void setupFromProperties() throws JSONException { // <-------------------- Load from the configuration // --------------------> authenticationType = properties.get(PROPERTY_AUTHENTICATION_TYPE); authenticationType = StringUtils.isBlank(authenticationType) ? "" : authenticationType; origin = properties.get(PROPERTY_ORIGIN); origin = StringUtils.isBlank(origin) ? "" : origin; // When checking if the file's url is ok, we want to get rid of the // case. origin = origin.toLowerCase(); authenticationLogin = properties.get(PROPERTY_LOGIN); authenticationLogin = StringUtils.isBlank(authenticationLogin) ? "" : authenticationLogin; authenticationPwd = properties.get(PROPERTY_PWD); authenticationPwd = StringUtils.isBlank(authenticationPwd) ? "" : authenticationPwd; String moreHeadersJson = properties.get(PROPERTY_MORE_HEADERS); moreHeadersJson = StringUtils.isBlank(moreHeadersJson) ? "" : moreHeadersJson; // <-------------------- Realign etc. --------------------> if (authenticationType.toLowerCase().equals(AUTH_NONE.toLowerCase())) { authenticationType = AUTH_NONE; } else if (authenticationType.toLowerCase().equals(AUTH_BASIC.toLowerCase())) { authenticationType = AUTH_BASIC; String authString = authenticationLogin + ":" + authenticationPwd; basicAuthentication = "Basic " + new String(Base64.encodeBase64(authString.getBytes())); } moreHeaders = new HashMap<String, String>(); if (!moreHeadersJson.isEmpty()) { JSONArray array = new JSONArray(moreHeadersJson); int max = array.length(); JSONObject obj; for (int i = 0; i < max; ++i) { obj = array.getJSONObject(i); moreHeaders.put(obj.getString("key"), obj.getString("value")); } } } protected void setupCache() throws IOException { boolean useCache; String str = properties.get(PROPERTY_USE_CACHE); useCache = StringUtils.isNotBlank(str) && str.toLowerCase().equals("true"); if (useCache) { String name = StringUtils.replace(blobProviderId, " ", "") + "_cache"; cachedir = Framework.createTempFile(name, ""); cachedir.delete(); cachedir.mkdir(); long maxSize = getLongFromProperties(PROPERTY_CACHE_MAX_SIZE, DEFAULT_CACHE_MAX_FILE_SIZE); long maxCount = getLongFromProperties(PROPERTY_CACHE_MAX_COUNT, DEFAULT_CACHE_MAX_COUNT); long minAge = getLongFromProperties(PROPERTY_CACHE_MIN_AGE, DEFAULT_CACHE_MIN_AGE); fileCache = new LRUFileCache(cachedir, maxSize, maxCount, minAge); // be sure FileTracker won't steal our files! FileEventTracker.registerProtectedPath(cachedir.getAbsolutePath()); } } protected long getLongFromProperties(String key, long defaultValue) { long value; String str = properties.get(key); try { value = Long.parseLong(str); } catch (NumberFormatException e) { value = -1; } if (value <= 0) { value = defaultValue; } return value; } /* * Just a centralization of adding the headers if needed. */ protected void addHeaders(HttpURLConnection connection, String urlStr) { // No authentication type or not the original domain => Assume the url // does not require authentication. // Else => authentication if (StringUtils.isNotBlank(origin) && urlStr.toLowerCase().startsWith(origin)) { switch (authenticationType) { case AUTH_BASIC: connection.setRequestProperty("Authorization", basicAuthentication); break; // . . . Other cases . . . } } if (moreHeaders.size() > 0) { for (Entry<String, String> entry : moreHeaders.entrySet()) { connection.setRequestProperty(entry.getKey(), entry.getValue()); } } } protected boolean isBasicAuthentication() { return StringUtils.isNotBlank(authenticationType) && authenticationType.equals(AUTH_BASIC); } protected boolean isNoAuthentication() { return StringUtils.isBlank(authenticationType) || authenticationType.equals(AUTH_NONE); } // <============================================================================> // <============================== PUBLIC METHODS // ==============================> // <============================================================================> @Override public void initialize(String blobProviderId, Map<String, String> properties) throws IOException { super.initialize(blobProviderId, properties); try { setupFromProperties(); setupCache(); } catch (JSONException e) { throw new IOException("Failed to load extra headers from the configuration", e); } } @Override public void close() { if (fileCache != null) { fileCache.clear(); } if (cachedir != null) { try { FileUtils.deleteDirectory(cachedir); } catch (IOException e) { throw new NuxeoException(e); } } } @Override public Blob readBlob(BlobInfo blobInfo) throws IOException { return new SimpleManagedBlob(blobInfo); } @Override public InputStream getStream(ManagedBlob blob) throws IOException { InputStream stream = null; String digest = null; // Using cache: Either get the file from the cache or download it and // add it to the cache // TODO: If the file is not in the cache, let's put it later, in another // thread, asynchronously // so we don't delay the download form the client if (fileCache != null) { digest = blob.getDigest(); if (digest == null) { throw new NuxeoException("This blob has no digest: " + blob.getKey()); } File file = fileCache.getFile(digest); if (file == null) { Blob downloaded = downloadFile(blob); file = downloaded.getFile(); fileCache.putFile(digest, file); file = fileCache.getFile(digest); } stream = new FileInputStream(file); } else { // Not using the cache: Just get the file from the http stream. String urlStr = extractUrl(blob); try { URL url = new URL(urlStr); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); addHeaders(connection, urlStr); stream = connection.getInputStream(); } catch (MalformedURLException e) { throw new NuxeoException("Fatal protocol violation", e); } catch (IOException e) { throw new IOException("Fatal transport error", e); } finally { } } return stream; } /** * Downloads the remote data, returns a temp. blob, with ".tmp" as file * extension * <p> * (used by unit tests so far) * * @param blob * @return the downloaded blob * @throws IOException * @since 8.1 */ public Blob downloadFile(ManagedBlob blob) throws IOException { Blob result = null; String urlStr = extractUrl(blob); URL url = new URL(urlStr); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); addHeaders(connection, urlStr); String fileName = blob.getFilename(); String mimeType = blob.getMimeType(); result = Blobs.createBlobWithExtension(".tmp"); FileOutputStream outputStream = new FileOutputStream(result.getFile()); InputStream inputStream = connection.getInputStream(); int bytesRead = -1; byte[] buffer = new byte[10240]; while ((bytesRead = inputStream.read(buffer)) != -1) { outputStream.write(buffer, 0, bytesRead); } outputStream.close(); inputStream.close(); result.setFilename(fileName); result.setMimeType(mimeType); return result; } /** * This class does no support user updates, whatever the value of the * "preventUserUpdate" property. */ @Override public boolean supportsUserUpdate() { return false; // supportsUserUpdateDefaultFalse(); } /** * This class does not support writing a blob and always throws an exception */ @Override public String writeBlob(Blob blob, Document doc) throws IOException { throw new UnsupportedOperationException("Writing a blob is not supported"); } /** * Creates a blob whose key is the remote URL * <p> * <b>IMPORTANT</b>: * <ul> * <li>The <code>blobInfo.key</code> field will be replaced by the * provider's own key scheme</li> * <li>blobInfo <i>must</i> contain the mime type and the filename. If they * don't, the code tries to guess the values by sending a HEAD request. If * this fails, an error is thrown.</li> * </ul> * <p> * The passed {@link BlobInfo} contains information about the blob * <p> * <p> * A future improvement would be to allow just a URL and using HEAD request * maybe to fetch the infos (mime type, lenght, file name, ...) * * @param blobInfo * the blob info where the key is the URL * @return the blob */ public ManagedBlob createBlob(BlobInfo blobInfo) throws IOException { String url = blobInfo.key; BlobInfo newInfo = new BlobInfo(blobInfo); newInfo.key = blobProviderId + ":" + url; if (StringUtils.isBlank(newInfo.mimeType) || StringUtils.isBlank(newInfo.filename)) { BlobInfo guessedInfo = guessInfosFromURL(url); if (guessedInfo == null || newInfo.mimeType == null || newInfo.filename == null) { throw new NuxeoException("BlobInfo with no mime type or no file name, and could not guess them."); } newInfo.mimeType = guessedInfo.mimeType == null ? newInfo.mimeType : guessedInfo.mimeType; newInfo.filename = guessedInfo.filename == null ? newInfo.filename : guessedInfo.filename; newInfo.encoding = guessedInfo.encoding == null ? newInfo.encoding : guessedInfo.encoding; } if (newInfo.length == null) { // Default widgets in the UI activate a link if the length is >= 0 // (see extended_file_widget.xhtml) newInfo.length = 0L; } if (StringUtils.isBlank(newInfo.digest)) { newInfo.digest = DigestUtils.md5Hex(url); } if (StringUtils.isBlank(newInfo.encoding)) { newInfo.encoding = null; } return new SimpleManagedBlob(newInfo); } /** * Tests the URL (stored in the blob key) using a HEAD http verb, and adding * authentication if needed. * * @param blob * @return true if the URL can be reached with no error * @since 8.1 */ public boolean urlLooksValid(ManagedBlob blob) { String urlStr = extractUrl(blob); return urlLooksValid(urlStr); } /** * Tests the URL using a HEAD http verb, and adding authentication if * needed. * * @param urlStr * @return true if the URL can be reached with no error * @since 8.1 */ public boolean urlLooksValid(String urlStr) { boolean looksOk = false; try { URL url = new URL(urlStr); HttpURLConnection huc = (HttpURLConnection) url.openConnection(); addHeaders(huc, urlStr); huc.setRequestMethod("HEAD"); int responseCode = huc.getResponseCode(); looksOk = responseCode == HttpURLConnection.HTTP_OK; } catch (Exception e) { // Whatever the error, we fail. No need to be // granular here. looksOk = false; } return looksOk; } /** * Sends a HEAD request to get the info without downloading the file. * <p> * If an error occurs, returns null. * * @param urlStr * @return the BlobInfo * @since 8.1 */ public BlobInfo guessInfosFromURL(String urlStr) { BlobInfo bi = null; String attrLowerCase; try { URL url = new URL(urlStr); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); addHeaders(connection, urlStr); connection.setRequestMethod("HEAD"); int responseCode = connection.getResponseCode(); if (responseCode == HttpURLConnection.HTTP_OK) { bi = new BlobInfo(); bi.mimeType = connection.getContentType(); // Remove possible ...;charset="something" int idx = bi.mimeType.indexOf(";"); if (idx >= 0) { bi.mimeType = bi.mimeType.substring(0, idx); } bi.encoding = connection.getContentEncoding(); bi.length = connection.getContentLengthLong(); if (bi.length < 0) { bi.length = 0L; } String disposition = connection.getHeaderField("Content-Disposition"); String fileName = null; if (disposition != null) { String[] attributes = disposition.split(";"); for (String attr : attributes) { attrLowerCase = attr.toLowerCase(); if (attrLowerCase.contains("filename=")) { attr = attr.trim(); // Remove filename= fileName = attr.substring(9); idx = fileName.indexOf("\""); if (idx > -1) { fileName = fileName.substring(idx + 1, fileName.lastIndexOf("\"")); } bi.filename = fileName; break; } else if (attrLowerCase.contains("filename*=utf-8''")) { attr = attr.trim(); // Remove filename= fileName = attr.substring(17); idx = fileName.indexOf("\""); if (idx > -1) { fileName = fileName.substring(idx + 1, fileName.lastIndexOf("\"")); } fileName = java.net.URLDecoder.decode(fileName, "UTF-8"); bi.filename = fileName; break; } } } else { // Try from the url idx = urlStr.lastIndexOf("/"); if (idx > -1) { fileName = urlStr.substring(idx + 1); bi.filename = java.net.URLDecoder.decode(fileName, "UTF-8"); } } } } catch (Exception e) { // Whatever the error, we fail. No need to be // granular here. bi = null; } return bi; } public int getNumberOfCachedFiles() { if (fileCache != null) { return fileCache.getNumberOfItems(); } return 0; } public boolean isCached(ManagedBlob blob) { if (fileCache != null && blob.getDigest() != null) { return fileCache.getFile(blob.getDigest()) != null; } return false; } }