Java tutorial
/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.orabulkloader; // // The "designer" notes of the Oracle bulkloader: // ---------------------------------------------- // // - "Enclosed" is used in the loader instead of "optionally enclosed" as optionally // encloses kind of destroys the escaping. // - A Boolean is output as Y and N (as in the text output step e.g.). If people don't // like this they can first convert the boolean value to something else before loading // it. // - Filters (besides data and datetime) are not supported as it slows down. // // import org.apache.commons.vfs.FileObject; import org.pentaho.di.core.Const; import org.pentaho.di.core.database.DatabaseMeta; import org.pentaho.di.core.encryption.Encr; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleFileException; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; import java.io.BufferedReader; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; /** * Performs a bulk load to an oracle table. * * @author Sven Boden * @since 20-feb-2007 */ public class OraBulkLoader extends BaseStep implements StepInterface { private static Class<?> PKG = OraBulkLoaderMeta.class; // for i18n purposes, needed by Translator2!! public static final int EX_SUCC = 0; public static final int EX_WARN = 2; Process sqlldrProcess = null; private OraBulkLoaderMeta meta; protected OraBulkLoaderData data; private OraBulkDataOutput output = null; /* * Local copy of the transformation "preview" property. We only forward the rows upon previewing, we don't do any of * the real stuff. */ private boolean preview = false; // // This class continually reads from the stream, and sends it to the log // if the logging level is at least basic level. // private final class StreamLogger extends Thread { private InputStream input; private String type; StreamLogger(InputStream is, String type) { this.input = is; this.type = type + ">"; } public void run() { try { final BufferedReader br = new BufferedReader(new InputStreamReader(input)); String line; while ((line = br.readLine()) != null) { // Only perform the concatenation if at basic level. Otherwise, // this just reads from the stream. if (log.isBasic()) { logBasic(type + line); } } } catch (IOException ioe) { ioe.printStackTrace(); } } } public OraBulkLoader(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans) { super(stepMeta, stepDataInterface, copyNr, transMeta, trans); } private String substituteRecordTerminator(String terminator) { final StringBuilder in = new StringBuilder(); int length; boolean escaped = false; terminator = environmentSubstitute(terminator); length = terminator.length(); for (int i = 0; i < length; i++) { final char c = terminator.charAt(i); if (escaped) { switch (c) { case 'n': in.append('\n'); break; case 'r': in.append('\r'); break; default: in.append(c); break; } escaped = false; } else if (c == '\\') { escaped = true; } else { in.append(c); } } return in.toString(); } private String encodeRecordTerminator(String terminator, String encoding) throws KettleException { final String in = substituteRecordTerminator(terminator); final StringBuilder out = new StringBuilder(); byte[] bytes; try { // use terminator in hex representation due to character set // terminator in hex representation must be in character set // of data file if (Const.isEmpty(encoding)) { bytes = in.getBytes(); } else { bytes = in.getBytes(encoding); } for (byte aByte : bytes) { final String hex = Integer.toHexString(aByte); if (hex.length() == 1) { out.append('0'); } out.append(hex); } } catch (UnsupportedEncodingException e) { throw new KettleException("Unsupported character encoding: " + encoding, e); } return out.toString(); } /** * Get the contents of the control file as specified in the meta object * * @param meta * the meta object to model the control file after * * @return a string containing the control file contents */ public String getControlFileContents(OraBulkLoaderMeta meta, RowMetaInterface rm, Object[] r) throws KettleException { DatabaseMeta dm = meta.getDatabaseMeta(); String inputName = "'" + environmentSubstitute(meta.getDataFile()) + "'"; String loadAction = meta.getLoadAction(); StringBuilder contents = new StringBuilder(500); contents.append("OPTIONS(").append(Const.CR); contents.append(" ERRORS=\'").append(meta.getMaxErrors()).append("\'").append(Const.CR); if (meta.getCommitSizeAsInt(this) != 0 && !(meta.isDirectPath() && getStepMeta().getCopies() > 1)) { // For the second part of the above expressions: ROWS is not supported // in parallel mode (by sqlldr). contents.append(" , ROWS=\'").append(meta.getCommitSize()).append("\'").append(Const.CR); } if (meta.getBindSizeAsInt(this) != 0) { contents.append(" , BINDSIZE=\'").append(meta.getBindSize()).append("\'").append(Const.CR); } if (meta.getReadSizeAsInt(this) != 0) { contents.append(" , READSIZE=\'").append(meta.getReadSize()).append("\'").append(Const.CR); } contents.append(")").append(Const.CR); contents.append("LOAD DATA").append(Const.CR); if (!Const.isEmpty(meta.getCharacterSetName())) { contents.append("CHARACTERSET ").append(meta.getCharacterSetName()).append(Const.CR); } if (!OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals(meta.getLoadMethod()) || !Const.isEmpty(meta.getAltRecordTerm())) { String infile = inputName; if (OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals(meta.getLoadMethod())) { infile = "''"; } // For concurrent input, data command line argument must be specified contents.append("INFILE ").append(infile); if (!Const.isEmpty(meta.getAltRecordTerm())) { contents.append(" \"STR x'") .append(encodeRecordTerminator(meta.getAltRecordTerm(), meta.getEncoding())).append("'\""); } contents.append(Const.CR); } contents.append("INTO TABLE ") .append(dm.getQuotedSchemaTableCombination(environmentSubstitute(meta.getSchemaName()), environmentSubstitute(meta.getTableName()))) .append(Const.CR).append(loadAction).append(Const.CR) .append("FIELDS TERMINATED BY ',' ENCLOSED BY '\"'").append(Const.CR).append("("); String[] streamFields = meta.getFieldStream(); String[] tableFields = meta.getFieldTable(); String[] dateMask = meta.getDateMask(); if (streamFields == null || streamFields.length == 0) { throw new KettleException("No fields defined to load to database"); } for (int i = 0; i < streamFields.length; i++) { if (i != 0) { contents.append(", ").append(Const.CR); } contents.append(dm.quoteField(tableFields[i])); int pos = rm.indexOfValue(streamFields[i]); if (pos < 0) { throw new KettleException("Could not find field " + streamFields[i] + " in stream"); } ValueMetaInterface v = rm.getValueMeta(pos); switch (v.getType()) { case ValueMetaInterface.TYPE_STRING: if (v.getLength() > 255) { contents.append(" CHAR(").append(v.getLength()).append(")"); } else { contents.append(" CHAR"); } break; case ValueMetaInterface.TYPE_INTEGER: case ValueMetaInterface.TYPE_NUMBER: case ValueMetaInterface.TYPE_BIGNUMBER: break; case ValueMetaInterface.TYPE_DATE: if (OraBulkLoaderMeta.DATE_MASK_DATE.equals(dateMask[i])) { contents.append(" DATE 'yyyy-mm-dd'"); } else if (OraBulkLoaderMeta.DATE_MASK_DATETIME.equals(dateMask[i])) { contents.append(" TIMESTAMP 'yyyy-mm-dd hh24:mi:ss.ff'"); } else { // If not specified the default is date. contents.append(" DATE 'yyyy-mm-dd'"); } break; case ValueMetaInterface.TYPE_BINARY: contents.append(" ENCLOSED BY '<startlob>' AND '<endlob>'"); break; case ValueMetaInterface.TYPE_TIMESTAMP: contents.append(" TIMESTAMP 'yyyy-mm-dd hh24:mi:ss.ff'"); break; default: break; } } contents.append(")"); return contents.toString(); } /** * Create a control file. * * @param filename path to control file * @param meta step meta * @throws KettleException */ public void createControlFile(String filename, Object[] row, OraBulkLoaderMeta meta) throws KettleException { File controlFile = new File(filename); FileWriter fw = null; try { controlFile.createNewFile(); fw = new FileWriter(controlFile); fw.write(getControlFileContents(meta, getInputRowMeta(), row)); } catch (IOException ex) { throw new KettleException(ex.getMessage(), ex); } finally { try { if (fw != null) { fw.close(); } } catch (Exception ex) { // Ignore errors } } } /** * Create the command line for an sqlldr process depending on the meta information supplied. * * @param meta * The meta data to create the command line from * @param password * Use the real password or not * * @return The string to execute. * * @throws KettleException * Upon any exception */ public String createCommandLine(OraBulkLoaderMeta meta, boolean password) throws KettleException { StringBuilder sb = new StringBuilder(300); if (meta.getSqlldr() != null) { try { FileObject fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getSqlldr()), getTransMeta()); String sqlldr = KettleVFS.getFilename(fileObject); sb.append(sqlldr); } catch (KettleFileException ex) { throw new KettleException("Error retrieving sqlldr string", ex); } } else { throw new KettleException("No sqlldr application specified"); } if (meta.getControlFile() != null) { try { FileObject fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getControlFile()), getTransMeta()); sb.append(" control=\'"); sb.append(KettleVFS.getFilename(fileObject)); sb.append("\'"); } catch (KettleFileException ex) { throw new KettleException("Error retrieving controlfile string", ex); } } else { throw new KettleException("No control file specified"); } if (OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals(meta.getLoadMethod())) { sb.append(" data=\'-\'"); } if (meta.getLogFile() != null) { try { FileObject fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getLogFile()), getTransMeta()); sb.append(" log=\'"); sb.append(KettleVFS.getFilename(fileObject)); sb.append("\'"); } catch (KettleFileException ex) { throw new KettleException("Error retrieving logfile string", ex); } } if (meta.getBadFile() != null) { try { FileObject fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getBadFile()), getTransMeta()); sb.append(" bad=\'"); sb.append(KettleVFS.getFilename(fileObject)); sb.append("\'"); } catch (KettleFileException ex) { throw new KettleException("Error retrieving badfile string", ex); } } if (meta.getDiscardFile() != null) { try { FileObject fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getDiscardFile()), getTransMeta()); sb.append(" discard=\'"); sb.append(KettleVFS.getFilename(fileObject)); sb.append("\'"); } catch (KettleFileException ex) { throw new KettleException("Error retrieving discardfile string", ex); } } DatabaseMeta dm = meta.getDatabaseMeta(); if (dm != null) { String user = Const.NVL(dm.getUsername(), ""); String pass = Const .NVL(Encr.decryptPasswordOptionallyEncrypted(environmentSubstitute(dm.getPassword())), ""); if (!password) { pass = "******"; } String dns = Const.NVL(dm.getDatabaseName(), ""); sb.append(" userid=").append(environmentSubstitute(user)).append("/") .append(environmentSubstitute(pass)).append("@"); String overrideName = meta.getDbNameOverride(); if (Const.isEmpty(Const.rtrim(overrideName))) { sb.append(environmentSubstitute(dns)); } else { // if the database name override is filled in, do that one. sb.append(environmentSubstitute(overrideName)); } } else { throw new KettleException("No connection specified"); } if (meta.isDirectPath()) { sb.append(" DIRECT=TRUE"); if (getStepMeta().getCopies() > 1 || meta.isParallel()) { sb.append(" PARALLEL=TRUE"); } } return sb.toString(); } public void checkExitVal(int exitVal) throws KettleException { if (exitVal == EX_SUCC) { return; } if (meta.isFailOnWarning() && (exitVal == EX_WARN)) { throw new KettleException("sqlldr returned warning"); } else if (meta.isFailOnError() && (exitVal != EX_WARN)) { throw new KettleException("sqlldr returned an error (exit code " + exitVal + ")"); } } public boolean execute(OraBulkLoaderMeta meta, boolean wait) throws KettleException { Runtime rt = Runtime.getRuntime(); try { sqlldrProcess = rt.exec(createCommandLine(meta, true)); // any error message? StreamLogger errorLogger = new StreamLogger(sqlldrProcess.getErrorStream(), "ERROR"); // any output? StreamLogger outputLogger = new StreamLogger(sqlldrProcess.getInputStream(), "OUTPUT"); // kick them off errorLogger.start(); outputLogger.start(); if (wait) { // any error??? int exitVal = sqlldrProcess.waitFor(); sqlldrProcess = null; logBasic(BaseMessages.getString(PKG, "OraBulkLoader.Log.ExitValueSqlldr", "" + exitVal)); checkExitVal(exitVal); } } catch (Exception ex) { // Don't throw the message upwards, the message contains the password. throw new KettleException("Error while executing sqlldr \'" + createCommandLine(meta, false) + "\'"); } return true; } public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException { meta = (OraBulkLoaderMeta) smi; data = (OraBulkLoaderData) sdi; try { Object[] r = getRow(); // Get row from input rowset & set row busy! if (r == null) { // no more input to be expected... setOutputDone(); if (!preview) { if (output != null) { // Close the output try { output.close(); } catch (IOException e) { throw new KettleException("Error while closing output", e); } output = null; } String loadMethod = meta.getLoadMethod(); if (OraBulkLoaderMeta.METHOD_AUTO_END.equals(loadMethod)) { // if this is the first line, we do not need to execute loader // control file may not exists if (!first) { execute(meta, true); sqlldrProcess = null; } } else if (OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals(meta.getLoadMethod())) { try { if (sqlldrProcess != null) { int exitVal = sqlldrProcess.waitFor(); sqlldrProcess = null; logBasic(BaseMessages.getString(PKG, "OraBulkLoader.Log.ExitValueSqlldr", "" + exitVal)); checkExitVal(exitVal); } else if (!first) { throw new KettleException("Internal error: no sqlldr process running"); } } catch (Exception ex) { throw new KettleException("Error while executing sqlldr", ex); } } } return false; } if (!preview) { if (first) { first = false; String recTerm = Const.CR; if (!Const.isEmpty(meta.getAltRecordTerm())) { recTerm = substituteRecordTerminator(meta.getAltRecordTerm()); } createControlFile(environmentSubstitute(meta.getControlFile()), r, meta); output = new OraBulkDataOutput(meta, recTerm); if (OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals(meta.getLoadMethod())) { execute(meta, false); } output.open(this, sqlldrProcess); } output.writeLine(getInputRowMeta(), r); } putRow(getInputRowMeta(), r); incrementLinesOutput(); } catch (KettleException e) { logError(BaseMessages.getString(PKG, "OraBulkLoader.Log.ErrorInStep") + e.getMessage()); setErrors(1); stopAll(); setOutputDone(); // signal end to receiver(s) return false; } return true; } public boolean init(StepMetaInterface smi, StepDataInterface sdi) { meta = (OraBulkLoaderMeta) smi; data = (OraBulkLoaderData) sdi; Trans trans = getTrans(); preview = trans.isPreview(); return super.init(smi, sdi); } public void dispose(StepMetaInterface smi, StepDataInterface sdi) { meta = (OraBulkLoaderMeta) smi; data = (OraBulkLoaderData) sdi; super.dispose(smi, sdi); // close output stream (may terminate running sqlldr) if (output != null) { // Close the output try { output.close(); } catch (IOException e) { logError("Error while closing output", e); } output = null; } // running sqlldr process must be terminated if (sqlldrProcess != null) { try { int exitVal = sqlldrProcess.waitFor(); sqlldrProcess = null; logBasic(BaseMessages.getString(PKG, "OraBulkLoader.Log.ExitValueSqlldr", "" + exitVal)); } catch (InterruptedException e) { /* process should be destroyed */ e.printStackTrace(); if (sqlldrProcess != null) { sqlldrProcess.destroy(); } } } if (!preview && meta.isEraseFiles()) { // Erase the created cfg/dat files if requested. We don't erase // the rest of the files because it would be "stupid" to erase them // right after creation. If you don't want them, don't fill them in. FileObject fileObject = null; String method = meta.getLoadMethod(); // OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals(method) || if (OraBulkLoaderMeta.METHOD_AUTO_END.equals(method)) { if (meta.getControlFile() != null) { try { fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getControlFile()), getTransMeta()); fileObject.delete(); fileObject.close(); } catch (Exception ex) { logError("Error deleting control file \'" + KettleVFS.getFilename(fileObject) + "\': " + ex.getMessage(), ex); } } } if (OraBulkLoaderMeta.METHOD_AUTO_END.equals(method)) { // In concurrent mode the data is written to the control file. if (meta.getDataFile() != null) { try { fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getDataFile()), getTransMeta()); fileObject.delete(); fileObject.close(); } catch (Exception ex) { logError("Error deleting data file \'" + KettleVFS.getFilename(fileObject) + "\': " + ex.getMessage(), ex); } } } if (OraBulkLoaderMeta.METHOD_MANUAL.equals(method)) { logBasic("Deletion of files is not compatible with \'manual load method\'"); } } } }