Java tutorial
/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.luciddbbulkloader; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.SQLWarning; import java.sql.Types; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.apache.commons.vfs.FileObject; import org.pentaho.di.core.Const; import org.pentaho.di.core.DBCache; import org.pentaho.di.core.database.Database; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.row.ValueMeta; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.util.StreamLogger; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; /** * Performs a bulk load to a LucidDB table. * * Based on Sven Boden's Oracle Bulk Loader step * * @author matt * @since 14-nov-2008 */ public class LucidDBBulkLoader extends BaseStep implements StepInterface { private static Class<?> PKG = LucidDBBulkLoaderMeta.class; // for i18n purposes, needed by Translator2!! private LucidDBBulkLoaderMeta meta; private LucidDBBulkLoaderData data; // private SqlRunner sqlRunner; public LucidDBBulkLoader(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans) { super(stepMeta, stepDataInterface, copyNr, transMeta, trans); } public boolean execute(LucidDBBulkLoaderMeta meta, boolean wait) throws KettleException { Runtime rt = Runtime.getRuntime(); try { String tableName = environmentSubstitute(meta.getTableName()); // 1) Set up the FIFO folder, create the directory and path to it... // String fifoVfsDirectory = environmentSubstitute(meta.getFifoDirectory()); FileObject directory = KettleVFS.getFileObject(fifoVfsDirectory, getTransMeta()); directory.createFolder(); String fifoDirectory = KettleVFS.getFilename(directory); // 2) Create the FIFO file using the "mkfifo" command... // Make sure to log all the possible output, also from STDERR // data.fifoFilename = KettleVFS.getFilename(directory) + Const.FILE_SEPARATOR + tableName + ".csv"; data.bcpFilename = KettleVFS.getFilename(directory) + Const.FILE_SEPARATOR + tableName + ".bcp"; File fifoFile = new File(data.fifoFilename); if (!fifoFile.exists()) { String mkFifoCmd = "mkfifo " + data.fifoFilename + ""; logBasic("Creating FIFO file using this command : " + mkFifoCmd); Process mkFifoProcess = rt.exec(mkFifoCmd); StreamLogger errorLogger = new StreamLogger(log, mkFifoProcess.getErrorStream(), "mkFifoError"); StreamLogger outputLogger = new StreamLogger(log, mkFifoProcess.getInputStream(), "mkFifoOuptut"); new Thread(errorLogger).start(); new Thread(outputLogger).start(); int result = mkFifoProcess.waitFor(); if (result != 0) { throw new Exception("Return code " + result + " received from statement : " + mkFifoCmd); } } // 3) Make a connection to LucidDB for sending SQL commands // (Also, we need a clear cache for getting up-to-date target metadata) DBCache.getInstance().clear(meta.getDatabaseMeta().getName()); if (meta.getDatabaseMeta() == null) { logError(BaseMessages.getString(PKG, "LuciDBBulkLoader.Init.ConnectionMissing", getStepname())); return false; } data.db = new Database(this, meta.getDatabaseMeta()); data.db.shareVariablesWith(this); // Connect to the database if (getTransMeta().isUsingUniqueConnections()) { synchronized (getTrans()) { data.db.connect(getTrans().getTransactionId(), getPartitionID()); } } else { data.db.connect(getPartitionID()); } logBasic("Connected to LucidDB"); // 4) Now we are ready to create the LucidDB FIFO server that will handle the actual bulk loading. // String fifoServerStatement = ""; fifoServerStatement += "create or replace server " + meta.getFifoServerName() + Const.CR; fifoServerStatement += "foreign data wrapper sys_file_wrapper" + Const.CR; fifoServerStatement += "options (" + Const.CR; fifoServerStatement += "directory '" + fifoDirectory + "'," + Const.CR; fifoServerStatement += "file_extension 'csv'," + Const.CR; fifoServerStatement += "with_header 'no'," + Const.CR; fifoServerStatement += "num_rows_scan '0'," + Const.CR; fifoServerStatement += "lenient 'no');" + Const.CR; logBasic("Creating LucidDB fifo_server with the following command: " + fifoServerStatement); data.db.execStatements(fifoServerStatement); // 5) Set the error limit in the LucidDB session // REVIEW jvs 13-Dec-2008: is this guaranteed to retain the same // connection? String errorMaxStatement = ""; errorMaxStatement += "alter session set \"errorMax\" = " + meta.getMaxErrors() + ";" + Const.CR; logBasic("Setting error limit in LucidDB session with the following command: " + errorMaxStatement); data.db.execStatements(errorMaxStatement); // 6) Now we also need to create a bulk loader file .bcp // createBulkLoadConfigFile(data.bcpFilename); // 7) execute the actual load command! // This will actually block until the load is done in the // separate execution thread; see notes in executeLoadCommand // on why it's important for this to occur BEFORE // opening our end of the FIFO. // executeLoadCommand(tableName); // 8) We have to write rows to the FIFO file later on. data.fifoStream = new BufferedOutputStream(new FileOutputStream(fifoFile)); } catch (Exception ex) { throw new KettleException(ex); } return true; } private void executeLoadCommand(String tableName) throws KettleException { String loadCommand = ""; loadCommand += "insert into " + data.schemaTable + Const.CR; loadCommand += "select * from " + meta.getFifoServerName() + ".\"DEFAULT\"." + tableName + Const.CR; // NOTE jvs 13-Dec-2008: We prepare the SQL before spawning the thread // to execute it. The reason is that if a SQL validation exception // occurs during preparation (e.g. due to datatype mismatch), we don't // even want to open our end of the FIFO, otherwise we can get stuck // since the server is never going to open its end until execution, // which ain't gonna happen in that case. logBasic("Preparing load command : " + Const.CR + loadCommand); PreparedStatement ps = data.db.prepareSQL(loadCommand); data.sqlRunner = new SqlRunner(data, ps); data.sqlRunner.start(); } private void createBulkLoadConfigFile(String bcpFilename) throws KettleException { File bcpFile = new File(bcpFilename); FileWriter writer = null; try { writer = new FileWriter(bcpFile); // The first Line is the version number, usually 9.0 // writer.write("9.0" + Const.CR); // The second line contains the number of columns... // writer.write(meta.getFieldTable().length + Const.CR); RowMetaInterface targetFieldMeta = meta.getRequiredFields(this); data.bulkFormatMeta = new ValueMetaInterface[meta.getFieldTable().length]; // The next block lists the columns from 1..N where N is the number of columns... // for (int i = 0; i < meta.getFieldTable().length; i++) { ValueMetaInterface field = getInputRowMeta().getValueMeta(data.keynrs[i]); // Col 1 : the column number (i+1) // writer.write(Integer.toString(i + 1) + " "); // Col 2 : the data type // String dataType = null; switch (field.getType()) { case ValueMetaInterface.TYPE_STRING: dataType = "SQLVARCHAR"; break; case ValueMetaInterface.TYPE_BIGNUMBER: dataType = "SQLREAL"; break; case ValueMetaInterface.TYPE_NUMBER: dataType = "SQLFLT8"; break; case ValueMetaInterface.TYPE_INTEGER: dataType = "SQLBIGINT"; break; case ValueMetaInterface.TYPE_DATE: // Use the actual datatypes in the target table to // determine how to create the control file column // definition for date/time fields. if (targetFieldMeta.getValueMetaList().get(i).getOriginalColumnType() == Types.DATE) { data.bulkFormatMeta[i] = data.bulkDateMeta; dataType = "SQLDATE"; } else { data.bulkFormatMeta[i] = data.bulkTimestampMeta; dataType = "SQLTIMESTAMP"; } break; // REVIEW jvs 13-Dec-2008: enable boolean support? case ValueMetaInterface.TYPE_BOOLEAN: dataType = "SQLCHAR"; break; default: dataType = "SQLVARCHAR"; break; } writer.write(dataType + " "); // Col 3 : an ignored column (start position or something like that) // writer.write("0 "); // Col 4 : the data length, just put the length metadata in here // if (field.getLength() == -1) { writer.write("1000 "); } else { writer.write("" + field.getLength() + " "); } // Col 5 : The separator is also ignored, we're going to put a tab in here, like in the sample // writer.write("\"\\t\" "); // "\t" // Col 6 : the column number again... // writer.write(Integer.toString(i + 1) + " "); // Col 7 : The identifier // writer.write(meta.getFieldTable()[i] + " "); // Col 8 : Collation / Format : leave it empty/default at the time being // writer.write("\"\" "); // "" // Newline to finish // writer.write(Const.CR); } } catch (Exception e) { throw new KettleException("Unable to create BCP control file", e); } finally { // That's it, close shop // if (writer != null) { try { writer.close(); } catch (IOException e) { throw new KettleException("Unable to close BCP file '" + bcpFilename + "'", e); } } } } public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException { meta = (LucidDBBulkLoaderMeta) smi; data = (LucidDBBulkLoaderData) sdi; try { Object[] r = getRow(); // Get row from input rowset & set row busy! if (r == null) { // no more input to be expected... setOutputDone(); // Close the fifo file... // data.fifoStream.close(); data.fifoStream = null; // wait for the INSERT statement to finish and check for any // error and/or warning... data.sqlRunner.join(); SqlRunner sqlRunner = data.sqlRunner; data.sqlRunner = null; for (String warning : sqlRunner.warnings) { // REVIEW jvs 13-Dec-2008: It would be nice if there were // a logWarning instead? logError(" (WARNING) " + warning); } sqlRunner.checkExcn(); // If there was no fatal exception, but there were warnings, // retrieve the rejected row count if (!sqlRunner.warnings.isEmpty()) { ResultSet rs = data.db.openQuery("SELECT PARAM_VALUE " + "FROM SYS_ROOT.USER_SESSION_PARAMETERS " + "WHERE PARAM_NAME='lastRowsRejected'"); try { rs.next(); setLinesRejected(rs.getInt(1)); } finally { rs.close(); } } return false; } if (first) { first = false; // Cache field indexes. // data.keynrs = new int[meta.getFieldStream().length]; for (int i = 0; i < data.keynrs.length; i++) { data.keynrs[i] = getInputRowMeta().indexOfValue(meta.getFieldStream()[i]); } // execute the client statement... // execute(meta, true); } writeRowToBulk(getInputRowMeta(), r); putRow(getInputRowMeta(), r); incrementLinesOutput(); return true; } catch (Exception e) { logError(BaseMessages.getString(PKG, "LucidDBBulkLoader.Log.ErrorInStep"), e); setErrors(1); stopAll(); setOutputDone(); // signal end to receiver(s) return false; } } private void writeRowToBulk(RowMetaInterface rowMeta, Object[] r) throws KettleException { try { // So, we have this output stream to which we can write CSV data to. // Basically, what we need to do is write the binary data (from strings to it as part of this proof of concept) // // The data format required is essentially: // for (int i = 0; i < data.keynrs.length; i++) { if (i > 0) { // Write a separator // data.fifoStream.write(data.separator); } int index = data.keynrs[i]; ValueMetaInterface valueMeta = rowMeta.getValueMeta(index); Object valueData = r[index]; if (valueData != null) { switch (valueMeta.getType()) { case ValueMetaInterface.TYPE_STRING: data.fifoStream.write(data.quote); if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) { // We had a string, just dump it back. data.fifoStream.write((byte[]) valueData); } else { data.fifoStream.write(valueMeta.getString(valueData).getBytes()); } data.fifoStream.write(data.quote); break; case ValueMetaInterface.TYPE_INTEGER: if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) { data.fifoStream.write((byte[]) valueData); } else { data.fifoStream.write(Long.toString(valueMeta.getInteger(valueData)).getBytes()); } break; case ValueMetaInterface.TYPE_DATE: // REVIEW jvs 13-Dec-2008: Is it OK to ignore // FieldFormatOk like this? /* * if (false && valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) { * data.fifoStream.write((byte[])valueData); } else { */ Date date = valueMeta.getDate(valueData); // Convert it to the ISO timestamp format // "yyyy-MM-dd HH:mm:ss" // or date format // "yyyy-MM-dd" as appropriate, since LucidDB // follows SQL:2003 here data.fifoStream.write(data.bulkFormatMeta[i].getString(date).getBytes()); // } break; case ValueMetaInterface.TYPE_BOOLEAN: if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) { data.fifoStream.write((byte[]) valueData); } else { data.fifoStream.write(Boolean.toString(valueMeta.getBoolean(valueData)).getBytes()); } break; case ValueMetaInterface.TYPE_NUMBER: if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) { data.fifoStream.write((byte[]) valueData); } else { data.fifoStream.write(Double.toString(valueMeta.getNumber(valueData)).getBytes()); } break; case ValueMetaInterface.TYPE_BIGNUMBER: if (valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) { data.fifoStream.write((byte[]) valueData); } else { data.fifoStream.write(valueMeta.getString(valueData).getBytes()); } break; default: break; } } } // finally write a newline // data.fifoStream.write(data.newline); } catch (Exception e) { throw new KettleException("Error serializing rows of data to the fifo file", e); } } public boolean init(StepMetaInterface smi, StepDataInterface sdi) { meta = (LucidDBBulkLoaderMeta) smi; data = (LucidDBBulkLoaderData) sdi; if (super.init(smi, sdi)) { data.quote = "\"".getBytes(); data.separator = ",".getBytes(); data.newline = Const.CR.getBytes(); data.bulkTimestampMeta = new ValueMeta("timestampMeta", ValueMetaInterface.TYPE_DATE); data.bulkTimestampMeta.setConversionMask("yyyy-MM-dd HH:mm:ss"); data.bulkTimestampMeta.setStringEncoding(meta.getEncoding()); data.bulkDateMeta = new ValueMeta("dateMeta", ValueMetaInterface.TYPE_DATE); data.bulkDateMeta.setConversionMask("yyyy-MM-dd"); data.bulkDateMeta.setStringEncoding(meta.getEncoding()); data.bulkNumberMeta = new ValueMeta("numberMeta", ValueMetaInterface.TYPE_NUMBER); data.bulkNumberMeta.setConversionMask("#.#"); data.bulkNumberMeta.setGroupingSymbol(","); data.bulkNumberMeta.setDecimalSymbol("."); data.bulkNumberMeta.setStringEncoding(meta.getEncoding()); data.bufferSize = Const.toInt(environmentSubstitute(meta.getBufferSize()), 100000); // Allocate the buffer // data.rowBuffer = new byte[data.bufferSize][]; data.bufferIndex = 0; // Schema-table combination... data.schemaTable = meta.getDatabaseMeta().getQuotedSchemaTableCombination( environmentSubstitute(meta.getSchemaName()), environmentSubstitute(meta.getTableName())); return true; } return false; } public void dispose(StepMetaInterface smi, StepDataInterface sdi) { meta = (LucidDBBulkLoaderMeta) smi; data = (LucidDBBulkLoaderData) sdi; // Close the output streams if still needed. // try { if (data.fifoStream != null) { data.fifoStream.close(); } // Stop the SQL execution thread if (data.sqlRunner != null) { data.sqlRunner.join(); data.sqlRunner = null; } // And finally, release the database connection if (data.db != null) { data.db.disconnect(); data.db = null; } } catch (Exception e) { setErrors(1L); logError("Unexpected error encountered while closing the client connection", e); } super.dispose(smi, sdi); } static class SqlRunner extends Thread { private LucidDBBulkLoaderData data; private PreparedStatement ps; private SQLException ex; List<String> warnings; SqlRunner(LucidDBBulkLoaderData data, PreparedStatement ps) { this.data = data; this.ps = ps; warnings = new ArrayList<String>(); } public void run() { try { // TODO jvs 12-Dec-2008: cross-check result against actual number // of rows sent. ps.executeUpdate(); // Pump out any warnings and save them. SQLWarning warning = ps.getWarnings(); while (warning != null) { warnings.add(warning.getMessage()); warning = warning.getNextWarning(); } } catch (SQLException ex) { this.ex = ex; } finally { try { data.db.closePreparedStatement(ps); } catch (KettleException ke) { // not much we can do with this } finally { ps = null; } } } void checkExcn() throws SQLException { // This is called from the main thread context to rethrow any saved // excn. if (ex != null) { throw ex; } } } }