Java tutorial
/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.enhanced.jsoninput; import java.io.IOException; import java.io.InputStream; import java.util.BitSet; import org.apache.commons.lang.NotImplementedException; import org.apache.commons.vfs2.FileObject; import org.apache.commons.vfs2.FileSystemException; import org.apache.poi.util.IOUtils; import org.pentaho.di.core.Const; import org.pentaho.di.core.QueueRowSet; import org.pentaho.di.core.ResultFile; import org.pentaho.di.core.RowSet; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleStepException; import org.pentaho.di.core.exception.KettleValueException; import org.pentaho.di.core.row.RowDataUtil; import org.pentaho.di.core.row.RowMeta; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; import org.pentaho.di.trans.steps.enhanced.jsoninput.reader.FastJsonReader; import org.pentaho.di.trans.steps.enhanced.jsoninput.reader.RowOutputConverter; import org.pentaho.di.trans.steps.enhanced.jsoninput.reader.InputsReader; import org.pentaho.di.trans.steps.fileinput.BaseFileInputStep; import org.pentaho.di.trans.steps.fileinput.IBaseFileInputReader; /** * Read Json files, parse them and convert them to rows and writes these to one or more output streams. * * @author Samatar * @author edube * @author jadametz * @since 20-06-2010 */ public class JsonInput extends BaseFileInputStep<JsonInputMeta, JsonInputData> implements StepInterface { private static Class<?> PKG = JsonInputMeta.class; // for i18n purposes, needed by Translator2!! private RowOutputConverter rowOutputConverter; public JsonInput(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans) { super(stepMeta, stepDataInterface, copyNr, transMeta, trans); } @Override protected boolean init() { data.rownr = 1L; data.nrInputFields = meta.getInputFields().length; data.repeatedFields = new BitSet(data.nrInputFields); // Take care of variable substitution for (int i = 0; i < data.nrInputFields; i++) { JsonInputField field = meta.getInputFields()[i]; field.setPath(environmentSubstitute(field.getPath())); if (field.isRepeated()) { data.repeatedFields.set(i); } } try { // Init a new JSON reader createReader(); } catch (KettleException e) { logError(e.getMessage()); return false; } return true; } @Override public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException { if (first) { first = false; prepareToRowProcessing(); } Object[] outRow = null; try { // Grab a row outRow = getOneOutputRow(); if (outRow == null) { setOutputDone(); // signal end to receiver(s) return false; // end of data or error. } if (log.isRowLevel()) { logRowlevel( BaseMessages.getString(PKG, "JsonInput.Log.ReadRow", data.outputRowMeta.getString(outRow))); } incrementLinesInput(); data.rownr++; putRow(data.outputRowMeta, outRow); // copy row to output rowset(s); if (meta.getRowLimit() > 0 && data.rownr > meta.getRowLimit()) { // limit has been reached: stop now. setOutputDone(); return false; } } catch (Exception e) { if (getStepMeta().isDoingErrorHandling()) { sendErrorRow(e.toString()); } else { logError(BaseMessages.getString(PKG, "JsonInput.ErrorInStepRunning", e.getMessage())); setErrors(getErrors() + 1); stopAll(); setOutputDone(); // signal end to receiver(s) return false; } } return true; } @Override protected void prepareToRowProcessing() throws KettleException, KettleStepException, KettleValueException { if (!meta.isInFields()) { data.outputRowMeta = new RowMeta(); if (!meta.isDoNotFailIfNoFile() && data.files.nrOfFiles() == 0) { String errMsg = BaseMessages.getString(PKG, "JsonInput.Log.NoFiles"); logError(errMsg); inputError(errMsg); } } else { data.readrow = getRow(); data.inputRowMeta = getInputRowMeta(); // Build output row meta starting from input fields' structure only if have something as input if (data.inputRowMeta != null) { data.outputRowMeta = data.inputRowMeta.clone(); // Check if source field is provided if (Const.isEmpty(meta.getFieldValue())) { logError(BaseMessages.getString(PKG, "JsonInput.Log.NoField")); throw new KettleException(BaseMessages.getString(PKG, "JsonInput.Log.NoField")); } // cache the position of the field if (data.indexSourceField < 0) { data.indexSourceField = data.inputRowMeta.indexOfValue(meta.getFieldValue()); if (data.indexSourceField < 0) { logError(BaseMessages.getString(PKG, "JsonInput.Log.ErrorFindingField", meta.getFieldValue())); throw new KettleException(BaseMessages.getString(PKG, "JsonInput.Exception.CouldnotFindField", meta.getFieldValue())); } } // if RemoveSourceField option is set, we remove the source field from the output meta if (meta.isRemoveSourceField()) { data.outputRowMeta.removeValueMeta(data.indexSourceField); // Get total previous fields minus one since we remove source field data.totalpreviousfields = data.inputRowMeta.size() - 1; } else { // Get total previous fields data.totalpreviousfields = data.inputRowMeta.size(); } } else data.outputRowMeta = new RowMeta(); } meta.getFields(data.outputRowMeta, getStepname(), null, null, this, repository, metaStore); // Create convert meta-data objects that will contain Date & Number formatters data.convertRowMeta = data.outputRowMeta.cloneToType(ValueMetaInterface.TYPE_STRING); data.inputs = new InputsReader(this, meta, data, new InputErrorHandler()).iterator(); // data.recordnr = 0; data.readerRowSet = new QueueRowSet(); data.readerRowSet.setDone(); this.rowOutputConverter = new RowOutputConverter(getLogChannel()); } private void addFileToResultFilesname(FileObject file) { if (meta.addResultFile()) { // Add this to the result file names... ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_GENERAL, file, getTransMeta().getName(), getStepname()); resultFile.setComment(BaseMessages.getString(PKG, "JsonInput.Log.FileAddedResult")); addResultFile(resultFile); } } public boolean onNewFile(FileObject file) throws FileSystemException { if (file == null) { String errMsg = BaseMessages.getString(PKG, "JsonInput.Log.IsNotAFile", "null"); logError(errMsg); inputError(errMsg); return false; } else if (!file.exists()) { String errMsg = BaseMessages.getString(PKG, "JsonInput.Log.IsNotAFile", file.getName().getFriendlyURI()); logError(errMsg); inputError(errMsg); return false; } if (hasAdditionalFileFields()) { fillFileAdditionalFields(data, file); } if (file.getContent().getSize() == 0) { // log only basic as a warning (was before logError) if (meta.isIgnoreEmptyFile()) { logBasic(BaseMessages.getString(PKG, "JsonInput.Error.FileSizeZero", "" + file.getName())); } else { logError(BaseMessages.getString(PKG, "JsonInput.Error.FileSizeZero", "" + file.getName())); incrementErrors(); return false; } } return true; } @Override protected void fillFileAdditionalFields(JsonInputData data, FileObject file) throws FileSystemException { super.fillFileAdditionalFields(data, file); data.filename = KettleVFS.getFilename(file); data.filenr++; if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JsonInput.Log.OpeningFile", file.toString())); } addFileToResultFilesname(file); } private boolean parseNextInputToRowSet(InputStream input) throws KettleException { if (input != null) { try { data.readerRowSet = data.reader.parse(input); return true; } catch (KettleException ke) { logInputError(ke); } catch (Exception e) { logInputError(e); } } return false; } private void logInputError(KettleException e) { logError(e.getLocalizedMessage(), e); inputError(e.getLocalizedMessage()); } private void logInputError(Exception e) { String errMsg = (!meta.isInFields() || meta.getIsAFile()) ? BaseMessages.getString(PKG, "JsonReader.Error.ParsingFile", data.filename) : BaseMessages.getString(PKG, "JsonReader.Error.ParsingString", data.readrow[data.indexSourceField]); logError(errMsg, e); inputError(errMsg); } private void incrementErrors() { setErrors(getErrors() + 1); } private void inputError(String errorMsg) { if (getStepMeta().isDoingErrorHandling()) { sendErrorRow(errorMsg); } else { incrementErrors(); } } private class InputErrorHandler implements InputsReader.ErrorHandler { @Override public void error(Exception e) { logError(BaseMessages.getString(PKG, "JsonInput.Log.UnexpectedError", e.toString())); setErrors(getErrors() + 1); } @Override public void fileOpenError(FileObject file, FileSystemException e) { String msg = BaseMessages.getString(PKG, "JsonInput.Log.UnableToOpenFile", "" + data.filenr, file.toString(), e.toString()); logError(msg); inputError(msg); } @Override public void fileCloseError(FileObject file, FileSystemException e) { error(e); } } /** * get final row for output */ private Object[] getOneOutputRow() throws KettleException { Object[] rawReaderRow = null; while ((rawReaderRow = data.readerRowSet.getRow()) == null) { if (data.inputs.hasNext() && data.readerRowSet.isDone()) { try (InputStream nextIn = data.inputs.next()) { boolean parsed = parseNextInputToRowSet(nextIn); if (parsed && shouldOutputEmpty()) { return buildBaseOutputRow(); } } catch (IOException e) { logError(BaseMessages.getString(PKG, "JsonInput.Log.UnexpectedError", e.toString()), e); incrementErrors(); } } else { if (isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JsonInput.Log.FinishedProcessing")); } return null; } } Object[] outputRow = rowOutputConverter.getRow(buildBaseOutputRow(), rawReaderRow, data); addExtraFields(outputRow, data); return outputRow; } private void sendErrorRow(String errorMsg) { try { // same error as before String defaultErrCode = "JsonInput001"; if (data.readrow != null) { putError(getInputRowMeta(), data.readrow, 1, errorMsg, meta.getFieldValue(), defaultErrCode); } else { // when no input only error fields are recognized putError(new RowMeta(), new Object[0], 1, errorMsg, null, defaultErrCode); } } catch (KettleStepException e) { logError(e.getLocalizedMessage(), e); } } private boolean shouldOutputEmpty() { return meta.isInFields() && isEmpty(data.readerRowSet); } private boolean hasAdditionalFileFields() { return data.file != null; } private boolean isEmpty(RowSet readerRowSet) { return readerRowSet.size() == 0 && readerRowSet.isDone(); } /** * allocates out row */ private Object[] buildBaseOutputRow() { Object[] outputRowData; if (data.readrow != null) { if (meta.isRemoveSourceField() && data.indexSourceField > -1) { // skip the source field in the output array int sz = data.readrow.length; outputRowData = RowDataUtil.allocateRowData(data.outputRowMeta.size()); int ii = 0; for (int i = 0; i < sz; i++) { if (i != data.indexSourceField) { outputRowData[ii++] = data.readrow[i]; } } } else { outputRowData = RowDataUtil.createResizedCopy(data.readrow, data.outputRowMeta.size()); } } else { outputRowData = RowDataUtil.allocateRowData(data.outputRowMeta.size()); } return outputRowData; } // should be refactored private void addExtraFields(Object[] outputRowData, JsonInputData data) { int rowIndex = data.totalpreviousfields + data.nrInputFields; // See if we need to add the filename to the row... if (meta.includeFilename() && !Const.isEmpty(meta.getFilenameField())) { outputRowData[rowIndex++] = data.filename; } // See if we need to add the row number to the row... if (meta.includeRowNumber() && !Const.isEmpty(meta.getRowNumberField())) { outputRowData[rowIndex++] = new Long(data.rownr); } // Possibly add short filename... if (meta.getShortFileNameField() != null && meta.getShortFileNameField().length() > 0) { outputRowData[rowIndex++] = data.shortFilename; } // Add Extension if (meta.getExtensionField() != null && meta.getExtensionField().length() > 0) { outputRowData[rowIndex++] = data.extension; } // add path if (meta.getPathField() != null && meta.getPathField().length() > 0) { outputRowData[rowIndex++] = data.path; } // Add Size if (meta.getSizeField() != null && meta.getSizeField().length() > 0) { outputRowData[rowIndex++] = new Long(data.size); } // add Hidden if (meta.isHiddenField() != null && meta.isHiddenField().length() > 0) { outputRowData[rowIndex++] = new Boolean(data.path); } // Add modification date if (meta.getLastModificationDateField() != null && meta.getLastModificationDateField().length() > 0) { outputRowData[rowIndex++] = data.lastModificationDateTime; } // Add Uri if (meta.getUriField() != null && meta.getUriField().length() > 0) { outputRowData[rowIndex++] = data.uriName; } // Add RootUri if (meta.getRootUriField() != null && meta.getRootUriField().length() > 0) { outputRowData[rowIndex++] = data.rootUriName; } } private void createReader() throws KettleException { data.reader = new FastJsonReader(meta.getInputFields(), meta.isDefaultPathLeafToNull(), log); data.reader.setIgnoreMissingPath(meta.isIgnoreMissingPath()); } public void dispose(StepMetaInterface smi, StepDataInterface sdi) { meta = (JsonInputMeta) smi; data = (JsonInputData) sdi; if (data.file != null) { IOUtils.closeQuietly(data.file); } data.inputs = null; data.reader = null; data.readerRowSet = null; data.repeatedFields = null; super.dispose(smi, sdi); } /** * Only to comply with super, does nothing good. * * @throws NotImplementedException everytime */ @Override protected IBaseFileInputReader createReader(JsonInputMeta meta, JsonInputData data, FileObject file) throws Exception { throw new NotImplementedException(); } }