Java tutorial
/******************************************************************************* * * Pentaho Big Data * * Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.couchdbinput; import com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang.StringUtils; import org.apache.http.HttpHost; import org.apache.http.HttpResponse; import org.apache.http.client.AuthCache; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.protocol.HttpClientContext; import org.apache.http.impl.auth.BasicScheme; import org.apache.http.impl.client.BasicAuthCache; import org.pentaho.di.cluster.SlaveConnectionManager; import org.pentaho.di.core.Const; import org.pentaho.di.core.encryption.Encr; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleStepException; import org.pentaho.di.core.row.RowDataUtil; import org.pentaho.di.core.row.RowMeta; import org.pentaho.di.core.util.HttpClientManager; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; import java.io.BufferedInputStream; import java.io.IOException; public class CouchDbInput extends BaseStep implements StepInterface { private static Class<?> PKG = CouchDbInputMeta.class; // for i18n purposes, needed by Translator2!! $NON-NLS-1$ private final HttpClientFactory httpClientFactory = new HttpClientFactory(); private final HttpClientManager httpClientManager = createHttpClientManager(); private final GetMethodFactory getMethodFactory; private CouchDbInputMeta meta; private CouchDbInputData data; public CouchDbInput(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans) { super(stepMeta, stepDataInterface, copyNr, transMeta, trans); this.getMethodFactory = new GetMethodFactory(); } @Deprecated public CouchDbInput(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans, HttpClientFactory httpClientFactory, GetMethodFactory getMethodFactory) { super(stepMeta, stepDataInterface, copyNr, transMeta, trans); this.getMethodFactory = getMethodFactory; } public static String buildUrl(String hostname, int port, String db, String design, String view) { String url = "http://" + hostname; if (port >= 0) { url += ":" + port; } url += "/" + db; url += "/_design/" + design; url += "/_view/" + view; return url; } public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException { try { if (first) { first = false; data.outputRowMeta = new RowMeta(); meta.getFields(data.outputRowMeta, getStepname(), null, null, this, repository, metaStore); // Skip over first introduction row containing the number of results... // // Example: {"total_rows":3,"offset":0,"rows":[ // data.buffer = new StringBuilder(1000); data.open = 0; boolean cont = true; int c = data.bufferedInputStream.read(); while (c >= 0 && cont && !isStopped()) { data.buffer.append((char) c); switch ((char) c) { case '{': data.open++; // Second JSON nested block means: another row of data... if (data.open == 2) { logBasic("Read header: >>" + data.buffer.substring(0, data.buffer.length() - 1) + "<<"); data.buffer.delete(0, data.buffer.length() - 1); cont = false; // Stop the while loop. } break; case '}': data.open--; break; case '"': // skip until the next " // int prev = c; c = data.bufferedInputStream.read(); while (c != '"' && prev != '\\' && c >= 0) { data.buffer.append((char) c); prev = c; c = data.bufferedInputStream.read(); } } if (cont) { c = data.bufferedInputStream.read(); } } if (c < 0) { setOutputDone(); return false; } } // read one JSON block from the data until no data is left on the input stream // boolean cont = true; int c = data.bufferedInputStream.read(); while (c >= 0 && cont && !isStopped()) { data.buffer.append((char) c); switch ((char) c) { case '{': data.open++; // Second JSON nested block means: another row of data... if (data.open == 2) { sendBufferRow(false); cont = false; // Stop the while loop. } break; case '}': data.open--; break; } if (cont) { c = data.bufferedInputStream.read(); } } if (c < 0) { if (data.buffer.length() > 0) { sendBufferRow(true); } setOutputDone(); return false; } return true; } catch (IOException e) { throw new KettleException("Unable to read from the CouchDB REST web service", e); } } private void sendBufferRow(boolean lastRow) throws KettleStepException { int pos = data.buffer.length() - 2; if (lastRow) { // Get rid of any ]} at the end of the last row. // pos = removeTrailingSpaces(data.buffer, pos); pos = removeTrailingCharacter(data.buffer, pos, '}'); pos = removeTrailingSpaces(data.buffer, pos); pos = removeTrailingCharacter(data.buffer, pos, ']'); } pos = removeTrailingSpaces(data.buffer, pos); pos = removeTrailingCharacter(data.buffer, pos, ','); String json = data.buffer.substring(0, pos + 1); data.buffer.delete(0, data.buffer.length() - 1); if (log.isDebug()) { logDebug("Read row: " + json); } Object[] row = RowDataUtil.allocateRowData(data.outputRowMeta.size()); int index = 0; row[index++] = json; // putRow will send the row on to the default output hop. // putRow(data.outputRowMeta, row); } private int removeTrailingCharacter(StringBuilder buffer, int pos, char c) { if (data.buffer.charAt(pos) == c) { pos--; } return pos; } private int removeTrailingSpaces(StringBuilder buffer, int pos) { while (pos >= 0 && Const.isSpace(data.buffer.charAt(pos))) { pos--; } return pos; } public boolean init(StepMetaInterface stepMetaInterface, StepDataInterface stepDataInterface) { if (super.init(stepMetaInterface, stepDataInterface)) { meta = (CouchDbInputMeta) stepMetaInterface; data = (CouchDbInputData) stepDataInterface; String hostname = environmentSubstitute(meta.getHostname()); int port = Const.toInt(environmentSubstitute(meta.getPort()), 5984); String db = environmentSubstitute(meta.getDbName()); String design = environmentSubstitute(meta.getDesignDocument()); String view = environmentSubstitute(meta.getViewName()); if (StringUtils.isEmpty(design)) { log.logError("Please provide a design document to use"); return false; } if (StringUtils.isEmpty(view)) { log.logError("Please provide a view name to look at"); return false; } String realUser = environmentSubstitute(meta.getAuthenticationUser()); String realPass = Encr .decryptPasswordOptionallyEncrypted(environmentSubstitute(meta.getAuthenticationPassword())); String url = buildUrl(hostname, port, db, design, view); logBasic("Querying CouchDB view on URL: " + url); try { HttpClient client = createHttpClient(realUser, realPass); HttpGet method = getMethodFactory.create(url); // Execute request data.inputStream = null; data.bufferedInputStream = null; //Client Preemptive Basic Authentication HttpClientContext context = null; if (StringUtils.isNotBlank(hostname)) { context = getHttpClientContext(hostname, port); } HttpResponse httpResponse = context != null ? client.execute(method, context) : client.execute(method); int result = httpResponse.getStatusLine().getStatusCode(); // the response data.inputStream = httpResponse.getEntity().getContent(); data.bufferedInputStream = new BufferedInputStream(data.inputStream, 1000); if (result < 200 || result >= 300) { StringBuilder err = new StringBuilder(); int c; while ((c = data.bufferedInputStream.read()) >= 0) { err.append((char) c); } logError("Web request returned code " + result + " : " + err.toString()); return false; } data.counter = 0; return true; } catch (Exception e) { logError(BaseMessages.getString(PKG, "CouchDbInput.ErrorConnectingToCouchDb.Exception", hostname, "" + port, db, view), e); return false; } } return false; } @Override public void dispose(StepMetaInterface smi, StepDataInterface sdi) { if (data.bufferedInputStream != null) { try { data.bufferedInputStream.close(); } catch (Exception e) { setErrors(1); logError("Error closing data stream", e); } } super.dispose(smi, sdi); } @Deprecated static class HttpClientFactory { public HttpClient createHttpClient() { return SlaveConnectionManager.getInstance().createHttpClient(); } } @VisibleForTesting HttpClient createHttpClient(String user, String password) { HttpClientManager.HttpClientBuilderFacade httpClientBuilder = httpClientManager.createBuilder(); // client.setTimeout(10000); // client.setConnectionTimeout(10000); if (StringUtils.isNotBlank(user)) { httpClientBuilder.setCredentials(user, password); } return httpClientBuilder.build(); } static class GetMethodFactory { public HttpGet create(String url) { return new HttpGet(url); } } @VisibleForTesting HttpClientManager createHttpClientManager() { return HttpClientManager.getInstance(); } @VisibleForTesting HttpClientContext getHttpClientContext(String hostname, int port) { HttpClientContext context; HttpHost target = new HttpHost(hostname, port, "http"); // Create AuthCache instance AuthCache authCache = new BasicAuthCache(); // Generate BASIC scheme object and add it to the local // auth cache BasicScheme basicAuth = new BasicScheme(); authCache.put(target, basicAuth); // Add AuthCache to the execution context context = HttpClientContext.create(); context.setAuthCache(authCache); return context; } }