Java tutorial
/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.mailinput; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.Enumeration; import java.util.List; import javax.mail.Header; import javax.mail.Message; import org.apache.commons.collections.iterators.ArrayIterator; import org.apache.commons.lang.StringUtils; import org.pentaho.di.core.Const; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.row.RowDataUtil; import org.pentaho.di.core.row.RowMeta; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.job.entries.getpop.MailConnection; import org.pentaho.di.job.entries.getpop.MailConnectionMeta; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; /** * Read data from POP3/IMAP server and input data to the next steps. * * @author Samatar * @since 21-08-2009 */ public class MailInput extends BaseStep implements StepInterface { private static Class<?> PKG = MailInputMeta.class; // for i18n purposes, needed by Translator2!! private MailInputMeta meta; private MailInputData data; private MessageParser instance = new MessageParser(); public MailInput(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans) { super(stepMeta, stepDataInterface, copyNr, transMeta, trans); } public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException { meta = (MailInputMeta) smi; data = (MailInputData) sdi; Object[] outputRowData = getOneRow(); if (outputRowData == null) { // no more input to be expected... setOutputDone(); return false; } if (isRowLevel()) { log.logRowlevel(toString(), BaseMessages.getString(PKG, "MailInput.Log.OutputRow", data.outputRowMeta.getString(outputRowData))); } putRow(data.outputRowMeta, outputRowData); // copy row to output rowset(s); if (data.rowlimit > 0 && data.rownr >= data.rowlimit) // limit has been reached: stop now. { setOutputDone(); return false; } return true; } public String[] getFolders(String realIMAPFolder) throws KettleException { data.folderenr = 0; data.messagesCount = 0; data.rownr = 0; String[] folderslist = null; if (meta.isIncludeSubFolders()) { String[] folderslist0 = data.mailConn.returnAllFolders(realIMAPFolder); if (folderslist0 == null || folderslist0.length == 0) { // mstor's default folder has no name folderslist = data.mailConn.getProtocol() == MailConnectionMeta.PROTOCOL_MBOX ? new String[] { "" } : new String[] { Const.NVL(realIMAPFolder, MailConnectionMeta.INBOX_FOLDER) }; } else { folderslist = new String[folderslist0.length + 1]; folderslist[0] = Const.NVL(realIMAPFolder, MailConnectionMeta.INBOX_FOLDER); for (int i = 0; i < folderslist0.length; i++) { folderslist[i + 1] = folderslist0[i]; } } } else { folderslist = data.mailConn.getProtocol() == MailConnectionMeta.PROTOCOL_MBOX ? new String[] { "" } : new String[] { Const.NVL(realIMAPFolder, MailConnectionMeta.INBOX_FOLDER) }; } return folderslist; } private void applySearch(Date beginDate, Date endDate) { // apply search term? String realSearchSender = environmentSubstitute(meta.getSenderSearchTerm()); if (!Const.isEmpty(realSearchSender)) { // apply FROM data.mailConn.setSenderTerm(realSearchSender, meta.isNotTermSenderSearch()); } String realSearchReceipient = environmentSubstitute(meta.getRecipientSearch()); if (!Const.isEmpty(realSearchReceipient)) { // apply TO data.mailConn.setReceipientTerm(realSearchReceipient); } String realSearchSubject = environmentSubstitute(meta.getSubjectSearch()); if (!Const.isEmpty(realSearchSubject)) { // apply Subject data.mailConn.setSubjectTerm(realSearchSubject, meta.isNotTermSubjectSearch()); } // Received Date switch (meta.getConditionOnReceivedDate()) { case MailConnectionMeta.CONDITION_DATE_EQUAL: data.mailConn.setReceivedDateTermEQ(beginDate); break; case MailConnectionMeta.CONDITION_DATE_GREATER: data.mailConn.setReceivedDateTermGT(beginDate); break; case MailConnectionMeta.CONDITION_DATE_SMALLER: data.mailConn.setReceivedDateTermLT(beginDate); break; case MailConnectionMeta.CONDITION_DATE_BETWEEN: data.mailConn.setReceivedDateTermBetween(beginDate, endDate); break; default: break; } // set FlagTerm? if (!data.usePOP) { //POP3 does not support any flags. //but still use ones for IMAP and maybe for MBOX? switch (meta.getValueImapList()) { case MailConnectionMeta.VALUE_IMAP_LIST_NEW: data.mailConn.setFlagTermNew(); break; case MailConnectionMeta.VALUE_IMAP_LIST_OLD: data.mailConn.setFlagTermOld(); break; case MailConnectionMeta.VALUE_IMAP_LIST_READ: data.mailConn.setFlagTermRead(); break; case MailConnectionMeta.VALUE_IMAP_LIST_UNREAD: data.mailConn.setFlagTermUnread(); break; case MailConnectionMeta.VALUE_IMAP_LIST_FLAGGED: data.mailConn.setFlagTermFlagged(); break; case MailConnectionMeta.VALUE_IMAP_LIST_NOT_FLAGGED: data.mailConn.setFlagTermNotFlagged(); break; case MailConnectionMeta.VALUE_IMAP_LIST_DRAFT: data.mailConn.setFlagTermDraft(); break; case MailConnectionMeta.VALUE_IMAP_LIST_NOT_DRAFT: data.mailConn.setFlagTermNotDraft(); break; default: break; } } } /** * Build an empty row based on the meta-data... * * @return */ private Object[] buildEmptyRow() { Object[] rowData = RowDataUtil.allocateRowData(data.outputRowMeta.size()); return rowData; } private boolean isFolderExausted() { return data.folder == null || !data.folderIterator.hasNext(); } private Object[] getOneRow() throws KettleException { while (isFolderExausted()) { if (!openNextFolder()) { return null; } } Object[] r = buildEmptyRow(); if (meta.isDynamicFolder()) { System.arraycopy(data.readrow, 0, r, 0, data.readrow.length); } try { Message message = data.folderIterator.next(); if (isDebug()) { logDebug(BaseMessages.getString(PKG, "MailInput.Log.FetchingMessage", message.getMessageNumber())); } try { instance.parseToArray(r, message); } catch (Exception e) { String msg = e.getMessage(); if (meta.isStopOnError()) { throw new KettleException(msg, e); } else { logError(msg, e); } } incrementLinesInput(); data.rownr++; } catch (Exception e) { throw new KettleException("Error adding values to row!", e); } return r; } @SuppressWarnings("unchecked") private boolean openNextFolder() { try { if (!meta.isDynamicFolder()) { // static folders list // let's check if we fetched all values in list if (data.folderenr >= data.folders.length) { // We have fetched all folders if (isDetailed()) { logDetailed(BaseMessages.getString(PKG, "MailInput.Log.FinishedProcessing")); } return false; } } else { // dynamic folders if (first) { first = false; data.readrow = getRow(); // Get row from input rowset & set row busy! if (data.readrow == null) { if (isDetailed()) { logDetailed(BaseMessages.getString(PKG, "MailInput.Log.FinishedProcessing")); } return false; } data.inputRowMeta = getInputRowMeta(); data.outputRowMeta = data.inputRowMeta.clone(); meta.getFields(data.outputRowMeta, getStepname(), null, null, this, repository, metaStore); // Get total previous fields data.totalpreviousfields = data.inputRowMeta.size(); if (Const.isEmpty(meta.getFolderField())) { logError(BaseMessages.getString(PKG, "MailInput.Error.DynamicFolderFieldMissing")); stopAll(); setErrors(1); return false; } data.indexOfFolderField = data.inputRowMeta.indexOfValue(meta.getFolderField()); if (data.indexOfFolderField < 0) { logError(BaseMessages.getString(PKG, "MailInput.Error.DynamicFolderUnreachable", meta.getFolderField())); stopAll(); setErrors(1); return false; } // get folder String foldername = data.inputRowMeta.getString(data.readrow, data.indexOfFolderField); if (isDebug()) { logDebug(BaseMessages.getString(PKG, "MailInput.Log.FoldernameInStream", meta.getFolderField(), foldername)); } data.folders = getFolders(foldername); } // end if first if (data.folderenr >= data.folders.length) { // we have fetched all values for input row // grab another row data.readrow = getRow(); // Get row from input rowset & set row busy! if (data.readrow == null) { if (isDetailed()) { logDetailed(BaseMessages.getString(PKG, "MailInput.Log.FinishedProcessing")); } return false; } // get folder String foldername = data.inputRowMeta.getString(data.readrow, data.indexOfFolderField); data.folders = getFolders(foldername); } } data.start = parseIntWithSubstitute(meta.getStart()); data.end = parseIntWithSubstitute(meta.getEnd()); // Get the current folder data.folder = data.folders[data.folderenr]; // Move folder pointer ahead! data.folderenr++; // open folder if (!data.usePOP && !Const.isEmpty(data.folder)) { data.mailConn.openFolder(data.folder, false); } else { data.mailConn.openFolder(false); } if (meta.useBatch() || (!Const.isEmpty(environmentSubstitute(meta.getFirstMails())) && Integer.parseInt(environmentSubstitute(meta.getFirstMails())) > 0)) { // get data by pieces Integer batchSize = meta.useBatch() ? meta.getBatchSize() : Integer.parseInt(environmentSubstitute(meta.getFirstMails())); Integer start = meta.useBatch() ? data.start : 1; Integer end = meta.useBatch() ? data.end : batchSize; data.folderIterator = new BatchFolderIterator(data.mailConn.getFolder(), batchSize, start, end); // TODO:args if (data.mailConn.getSearchTerm() != null) { // add search filter data.folderIterator = new SearchEnabledFolderIterator(data.folderIterator, data.mailConn.getSearchTerm()); } } else { // fetch all data.mailConn.retrieveMessages(); data.folderIterator = new ArrayIterator(data.mailConn.getMessages()); } if (isDebug()) { logDebug(BaseMessages.getString(PKG, "MailInput.Log.MessagesInFolder", data.folder, data.messagesCount)); } } catch (Exception e) { logError("Error opening folder " + data.folderenr + " " + data.folder + ": " + e.toString()); logError(Const.getStackTracker(e)); stopAll(); setErrors(1); return false; } return true; } public boolean init(StepMetaInterface smi, StepDataInterface sdi) { meta = (MailInputMeta) smi; data = (MailInputData) sdi; if (!super.init(smi, sdi)) { return false; } if (!meta.isDynamicFolder()) { try { // Create the output row meta-data data.outputRowMeta = new RowMeta(); meta.getFields(data.outputRowMeta, getStepname(), null, null, this, repository, metaStore); // get the // metadata // populated } catch (Exception e) { logError(BaseMessages.getString(PKG, "MailInput.ErrorInit", e.toString())); logError(Const.getStackTracker(e)); return false; } } data.usePOP = meta.getProtocol().equals(MailConnectionMeta.PROTOCOL_STRING_POP3); String realserver = environmentSubstitute(meta.getServerName()); if (meta.getProtocol().equals(MailConnectionMeta.PROTOCOL_STRING_MBOX) && StringUtils.startsWith(realserver, "file://")) { realserver = StringUtils.remove(realserver, "file://"); } String realusername = environmentSubstitute(meta.getUserName()); String realpassword = environmentSubstitute(meta.getPassword()); int realport = Const.toInt(environmentSubstitute(meta.getPort()), -1); String realProxyUsername = environmentSubstitute(meta.getProxyUsername()); if (!meta.isDynamicFolder()) { //Limit field has absolute priority String reallimitrow = environmentSubstitute(meta.getRowLimit()); int limit = Const.toInt(reallimitrow, 0); //Limit field has absolute priority if (limit == 0) { limit = getReadFirst(meta.getProtocol()); } data.rowlimit = limit; } Date beginDate = null; Date endDate = null; SimpleDateFormat df = new SimpleDateFormat(MailInputMeta.DATE_PATTERN); // check search terms // Received Date try { switch (meta.getConditionOnReceivedDate()) { case MailConnectionMeta.CONDITION_DATE_EQUAL: case MailConnectionMeta.CONDITION_DATE_GREATER: case MailConnectionMeta.CONDITION_DATE_SMALLER: String realBeginDate = environmentSubstitute(meta.getReceivedDate1()); if (Const.isEmpty(realBeginDate)) { throw new KettleException( BaseMessages.getString(PKG, "MailInput.Error.ReceivedDateSearchTermEmpty")); } beginDate = df.parse(realBeginDate); break; case MailConnectionMeta.CONDITION_DATE_BETWEEN: realBeginDate = environmentSubstitute(meta.getReceivedDate1()); if (Const.isEmpty(realBeginDate)) { throw new KettleException( BaseMessages.getString(PKG, "MailInput.Error.ReceivedDatesSearchTermEmpty")); } beginDate = df.parse(realBeginDate); String realEndDate = environmentSubstitute(meta.getReceivedDate2()); if (Const.isEmpty(realEndDate)) { throw new KettleException( BaseMessages.getString(PKG, "MailInput.Error.ReceivedDatesSearchTermEmpty")); } endDate = df.parse(realEndDate); break; default: break; } } catch (Exception e) { logError(BaseMessages.getString(PKG, "MailInput.Error.SettingSearchTerms", e.getMessage())); setErrors(1); stopAll(); } try { // create a mail connection object data.mailConn = new MailConnection(log, MailConnectionMeta.getProtocolFromString(meta.getProtocol(), MailConnectionMeta.PROTOCOL_IMAP), realserver, realport, realusername, realpassword, meta.isUseSSL(), meta.isUseProxy(), realProxyUsername); // connect data.mailConn.connect(); // Need to apply search filters? applySearch(beginDate, endDate); if (!meta.isDynamicFolder()) { // pass static folder name String realIMAPFolder = environmentSubstitute(meta.getIMAPFolder()); // return folders list // including sub folders if necessary data.folders = getFolders(realIMAPFolder); } } catch (Exception e) { logError(BaseMessages.getString(PKG, "MailInput.Error.OpeningConnection", e.getMessage())); setErrors(1); stopAll(); } data.nrFields = meta.getInputFields() != null ? meta.getInputFields().length : 0; return true; } private int getReadFirst(String protocol) { if (protocol.equals(MailConnectionMeta.PROTOCOL_STRING_POP3)) { return Const.toInt(meta.getFirstMails(), 0); } if (protocol.equals(MailConnectionMeta.PROTOCOL_STRING_IMAP)) { return Const.toInt(meta.getFirstIMAPMails(), 0); } //and we do not have this option for MBOX on UI. return 0; } public void dispose(StepMetaInterface smi, StepDataInterface sdi) { meta = (MailInputMeta) smi; data = (MailInputData) sdi; if (data.mailConn != null) { try { data.mailConn.disconnect(); data.mailConn = null; } catch (Exception e) { /* Ignore */ } } super.dispose(smi, sdi); } private Integer parseIntWithSubstitute(String toParse) { toParse = environmentSubstitute(toParse); if (!StringUtils.isEmpty(toParse)) { try { return Integer.parseInt(toParse); } catch (NumberFormatException e) { log.logError(e.getLocalizedMessage()); } } return null; } /** * Extracted message parse algorithm to be able to unit test separately * */ class MessageParser { Object[] parseToArray(Object[] r, Message message) throws Exception { // Execute for each Input field... for (int i = 0; i < data.nrFields; i++) { int index = data.totalpreviousfields + i; try { switch (meta.getInputFields()[i].getColumn()) { case MailInputField.COLUMN_MESSAGE_NR: r[index] = new Long(message.getMessageNumber()); break; case MailInputField.COLUMN_SUBJECT: r[index] = message.getSubject(); break; case MailInputField.COLUMN_SENDER: r[index] = StringUtils.join(message.getFrom(), ";"); break; case MailInputField.COLUMN_REPLY_TO: r[index] = StringUtils.join(message.getReplyTo(), ";"); break; case MailInputField.COLUMN_RECIPIENTS: r[index] = StringUtils.join(message.getAllRecipients(), ";"); break; case MailInputField.COLUMN_DESCRIPTION: r[index] = message.getDescription(); break; case MailInputField.COLUMN_BODY: r[index] = data.mailConn.getMessageBody(message); break; case MailInputField.COLUMN_RECEIVED_DATE: Date receivedDate = message.getReceivedDate(); r[index] = receivedDate != null ? new Date(receivedDate.getTime()) : null; break; case MailInputField.COLUMN_SENT_DATE: Date sentDate = message.getSentDate(); r[index] = sentDate != null ? new Date(sentDate.getTime()) : null; break; case MailInputField.COLUMN_CONTENT_TYPE: r[index] = message.getContentType(); break; case MailInputField.COLUMN_FOLDER_NAME: r[index] = data.mailConn.getFolderName(); break; case MailInputField.COLUMN_SIZE: r[index] = new Long(message.getSize()); break; case MailInputField.COLUMN_FLAG_DRAFT: r[index] = new Boolean(data.mailConn.isMessageDraft(message)); break; case MailInputField.COLUMN_FLAG_FLAGGED: r[index] = new Boolean(data.mailConn.isMessageFlagged(message)); break; case MailInputField.COLUMN_FLAG_NEW: r[index] = new Boolean(data.mailConn.isMessageNew(message)); break; case MailInputField.COLUMN_FLAG_READ: r[index] = new Boolean(data.mailConn.isMessageRead(message)); break; case MailInputField.COLUMN_FLAG_DELETED: r[index] = new Boolean(data.mailConn.isMessageDeleted(message)); break; case MailInputField.COLUMN_ATTACHED_FILES_COUNT: r[index] = new Long(data.mailConn.getAttachedFilesCount(message, null)); break; case MailInputField.COLUMN_HEADER: String name = meta.getInputFields()[i].getName(); // *only one name String[] arr = { name }; // this code was before generic epoch Enumeration<?> en = message.getMatchingHeaders(arr); if (en == null) { r[index] = ""; break; } List<String> headers = new ArrayList<String>(); while (en.hasMoreElements()) { Header next = Header.class.cast(en.nextElement()); headers.add(next.getValue()); } // [PDI-6532] if there is no matching headers return empty String r[index] = headers.isEmpty() ? "" : StringUtils.join(headers, ";"); break; case MailInputField.COLUMN_BODY_CONTENT_TYPE: r[index] = data.mailConn.getMessageBodyContentType(message); break; default: break; } } catch (Exception e) { String errMsg = "Error adding value for field " + meta.getInputFields()[i].getName(); throw new Exception(errMsg, e); } } return r; } } }