com.ephesoft.dcma.tablefinder.service.TableFinderServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for com.ephesoft.dcma.tablefinder.service.TableFinderServiceImpl.java

Source

/********************************************************************************* 
* Ephesoft is a Intelligent Document Capture and Mailroom Automation program 
* developed by Ephesoft, Inc. Copyright (C) 2015 Ephesoft Inc. 
* 
* This program is free software; you can redistribute it and/or modify it under 
* the terms of the GNU Affero General Public License version 3 as published by the 
* Free Software Foundation with the addition of the following permission added 
* to Section 15 as permitted in Section 7(a): FOR ANY PART OF THE COVERED WORK 
* IN WHICH THE COPYRIGHT IS OWNED BY EPHESOFT, EPHESOFT DISCLAIMS THE WARRANTY 
* OF NON INFRINGEMENT OF THIRD PARTY RIGHTS. 
* 
* This program is distributed in the hope that it will be useful, but WITHOUT 
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
* FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more 
* details. 
* 
* You should have received a copy of the GNU Affero General Public License along with 
* this program; if not, see http://www.gnu.org/licenses or write to the Free 
* Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
* 02110-1301 USA. 
* 
* You can contact Ephesoft, Inc. headquarters at 111 Academy Way, 
* Irvine, CA 92617, USA. or at email address info@ephesoft.com. 
* 
* The interactive user interfaces in modified source and object code versions 
* of this program must display Appropriate Legal Notices, as required under 
* Section 5 of the GNU Affero General Public License version 3. 
* 
* In accordance with Section 7(b) of the GNU Affero General Public License version 3, 
* these Appropriate Legal Notices must retain the display of the "Ephesoft" logo. 
* If the display of the logo is not reasonably feasible for 
* technical reasons, the Appropriate Legal Notices must display the words 
* "Powered by Ephesoft". 
********************************************************************************/

package com.ephesoft.dcma.tablefinder.service;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.script.ScriptException;

import org.apache.commons.collections.CollectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.ephesoft.dcma.batch.schema.Column;
import com.ephesoft.dcma.batch.schema.Coordinates;
import com.ephesoft.dcma.batch.schema.DataTable;
import com.ephesoft.dcma.batch.schema.HeaderRow;
import com.ephesoft.dcma.batch.schema.HocrPages.HocrPage;
import com.ephesoft.dcma.batch.schema.HocrPages.HocrPage.Spans.Span;
import com.ephesoft.dcma.batch.schema.Row;
import com.ephesoft.dcma.common.HocrUtil;
import com.ephesoft.dcma.common.LineDataCarrier;
import com.ephesoft.dcma.core.common.CurrencyCode;
import com.ephesoft.dcma.core.common.ExpressionEvaluator;
import com.ephesoft.dcma.core.common.TableColumnVO;
import com.ephesoft.dcma.core.exception.DCMAApplicationException;
import com.ephesoft.dcma.da.domain.TableColumnExtractionRule;
import com.ephesoft.dcma.da.domain.TableColumnsInfo;
import com.ephesoft.dcma.da.domain.TableExtractionRule;
import com.ephesoft.dcma.da.domain.TableInfo;
import com.ephesoft.dcma.da.domain.TableRuleInfo;
import com.ephesoft.dcma.tablefinder.constants.TableExtractionConstants;
import com.ephesoft.dcma.tablefinder.data.DataCarrier;
import com.ephesoft.dcma.tablefinder.share.ColumnCoordinates;
import com.ephesoft.dcma.tablefinder.share.ColumnHeaderExtractionDataCarrier;
import com.ephesoft.dcma.tablefinder.share.DataTableService;
import com.ephesoft.dcma.tablefinder.share.RegexValidationDataCarrier;
import com.ephesoft.dcma.tablefinder.share.TableExtractionAPI;
import com.ephesoft.dcma.tablefinder.share.TableExtractionAPIResult;
import com.ephesoft.dcma.tablefinder.share.TableExtractionResult;
import com.ephesoft.dcma.tablefinder.share.TableExtractionResultModifierUtility;
import com.ephesoft.dcma.tablefinder.share.TableRowFinderUtility;
import com.ephesoft.dcma.util.CurrencyUtil;
import com.ephesoft.dcma.util.EphesoftStringUtil;
import com.ephesoft.dcma.util.NumberUtil;

public class TableFinderServiceImpl implements TableFinderService {

    /**
     * LOGGER to print the logging information.
     */
    private static final Logger LOGGER = LoggerFactory.getLogger(TableFinderServiceImpl.class);

    /**
     * Gap between words of a column.
     */
    private int gapBetweenColumnWords;

    /**
     * @return int
     */
    public int getGapBetweenColumnWords() {
        return gapBetweenColumnWords;
    }

    /**
     * @param gapBetweenColumnWords
     */
    public void setGapBetweenColumnWords(final int gapBetweenColumnWords) {
        this.gapBetweenColumnWords = gapBetweenColumnWords;
    }

    /**
     * Represents the fuzzy match threshold value for applying fuzziness.
     */
    private float fuzzyMatchThresholdValue;

    @Override
    public float getFuzzyMatchThresholdValue() {
        return fuzzyMatchThresholdValue;
    }

    /**
     * Sets fuzziness match threshold value.
     * 
     * @param fuzzyMatchThresholdValue float
     */
    public void setFuzzyMatchThresholdValue(float fuzzyMatchThresholdValue) {
        this.fuzzyMatchThresholdValue = fuzzyMatchThresholdValue;
    }

    /**
     * Invalid characters which needs to ignore before applying table rule for the extracted column value.
     */
    private String invalidRuleCharacters;

    public DataTable findTableData(final List<TableInfo> inputDataCarrier, final HocrPage hocrPage,
            final int maxResults) throws DCMAApplicationException {

        final DataTable dataTable = extractFields(inputDataCarrier, hocrPage, maxResults);
        return dataTable;
    }

    private final DataTable extractFields(final List<TableInfo> tableInfoList, final HocrPage hocrPage,
            final int maxResults) throws DCMAApplicationException {
        final DataTable dataTable = readAllDataTables(tableInfoList, hocrPage);
        return dataTable;
    }

    /**
     * Reads all data tables in the page.
     * 
     * @param tableInfoList {@link List}<{@link TableInfo}>
     * @param hocrPage {@link HocrPage}
     * @return {@link DataTable}
     * @throws DCMAApplicationException In case an unusual behavior comes.
     */
    private DataTable readAllDataTables(final List<TableInfo> tableInfoList, final HocrPage hocrPage)
            throws DCMAApplicationException {

        final DataTable dataTable = new DataTable();
        DataTableService tableExtractionUtility = new DataTableService();
        if (null != tableInfoList) {
            for (final TableInfo tableInfo : tableInfoList) {
                if (null == tableInfo) {
                    LOGGER.info("Table info is null.");
                    continue;
                }
                final String tableName = tableInfo.getName();
                LOGGER.info(EphesoftStringUtil.concatenate("Extracting data for table : ", tableName));
                if (EphesoftStringUtil.isNullOrEmpty(tableName)) {
                    throw new DCMAApplicationException("Table name is null or empty.");
                }
                dataTable.setName(tableName);
                tableExtractionUtility.initializeHeaderRow(dataTable);
                tableExtractionUtility.initializeDataTableRows(dataTable);
                final List<TableColumnsInfo> tableColumnsInfoList = tableInfo.getTableColumnsInfo();
                if (CollectionUtils.isEmpty(tableColumnsInfoList)) {
                    LOGGER.error("Table Columns Info list is null or empty.");
                    continue;
                }
                HeaderRow headerRow = dataTable.getHeaderRow();
                HeaderRow.Columns columnsHeader = headerRow.getColumns();
                final List<Column> columnHeaderList = columnsHeader.getColumn();
                tableExtractionUtility.createColumnHeaders(columnHeaderList, tableColumnsInfoList);

                final List<TableExtractionRule> tableExtractionRuleList = tableInfo.getTableExtractionRules();
                if (CollectionUtils.isEmpty(tableExtractionRuleList)) {
                    LOGGER.error("Table Extraction Rule List is null or empty.");
                    continue;
                }

                // Getting attributes for extracting table data.
                final CurrencyCode tableCurrencyCode = tableInfo.getCurrencyCode();
                final String ruleOperatorDefinedForTable = tableInfo.getRuleOperator();
                final boolean isRemoveInvalidRows = tableInfo.isRemoveInvalidRows();
                final List<TableRuleInfo> tableValidationRulesList = tableInfo.getTableRuleInfo();
                LOGGER.debug(EphesoftStringUtil.concatenate("Currency for the table is: ", tableCurrencyCode,
                        "\n Rule Operator for table is: ", ruleOperatorDefinedForTable,
                        "\n Do we need to remove invalid rows?: ", isRemoveInvalidRows));
                List<LineDataCarrier> lineDataCarrierList = null;

                // Map to store the column header info against column name.
                // changed to avoid memory wastage.
                Map<String, DataCarrier> colHeaderInfoMap = null;

                // Maps whether a column is currency or not. If column field is currency then its currency code is stored.
                // Currently we are storing one locale per table. so value would be same for all keys. But this representation would be
                // helpful
                // when in any scenario currency support would be at column level.
                Map<String, CurrencyCode> columnCurrencyMap = null;
                List<TableColumnVO> tableColumnVOList = null;
                int noOfTableColumns = tableColumnsInfoList.size();
                List<TableExtractionResult> allTableRowsList = new ArrayList<TableExtractionResult>(
                        tableExtractionRuleList.size());
                for (TableExtractionRule tableExtractionRule : tableExtractionRuleList) {
                    if (null != tableExtractionRule) {
                        final String startPattern = tableExtractionRule.getStartPattern();
                        if (EphesoftStringUtil.isNullOrEmpty(startPattern)) {
                            LOGGER.error("Rule: ", tableExtractionRule.getRuleName(),
                                    "'s start pattern is null or empty.");
                            continue;
                        }
                        final String endPattern = tableExtractionRule.getEndPattern();
                        TableExtractionAPI tableExtractionAPI = tableExtractionUtility
                                .getTableExtractionAPI(tableExtractionRule.getTableAPI());
                        final List<TableColumnExtractionRule> tableColumnExtractionRuleList = tableExtractionRule
                                .getTableColumnExtractionRules();

                        // getting all table column info for extraction.
                        tableColumnVOList = tableExtractionUtility.getTableColumnData(tableColumnsInfoList,
                                tableColumnVOList, noOfTableColumns, tableColumnExtractionRuleList);

                        // Searching all rows of the page.
                        lineDataCarrierList = TableRowFinderUtility.searchAllRowOfTables(hocrPage, startPattern,
                                endPattern, fuzzyMatchThresholdValue);

                        if (CollectionUtils.isNotEmpty(lineDataCarrierList)) {
                            boolean colHeaderValidationRequired = tableExtractionAPI
                                    .isColHeaderValidationRequired();
                            boolean colCoordValidationRequired = tableExtractionAPI.isColCoordValidationRequired();

                            // Get extracted column header value mapped with column headers.
                            if (colHeaderValidationRequired) {
                                colHeaderInfoMap = tableExtractionUtility.getColumnHeaderMap(colHeaderInfoMap,
                                        tableColumnVOList);

                                // call method to populate the columnHeaderInfoMap.
                                tableExtractionUtility.setColumnHeaderInfo(lineDataCarrierList, colHeaderInfoMap,
                                        tableColumnVOList, fuzzyMatchThresholdValue);
                            }
                            boolean isTableColumnListDirty = tableExtractionUtility
                                    .sortTableColumnsInOrderOfOccurance(colHeaderInfoMap, tableColumnVOList,
                                            colHeaderValidationRequired, colCoordValidationRequired);
                            final List<Row> rowList = addDataTablesValues(lineDataCarrierList, tableColumnVOList,
                                    colHeaderInfoMap, tableExtractionAPI);
                            if (CollectionUtils.isNotEmpty(rowList)) {
                                columnCurrencyMap = tableExtractionUtility.getColumnCurrencyMap(tableCurrencyCode,
                                        columnCurrencyMap, noOfTableColumns, tableColumnExtractionRuleList);

                                // Multiple rule validation implementation.
                                applyTableValidationRulesOnRows(ruleOperatorDefinedForTable,
                                        tableValidationRulesList, isRemoveInvalidRows, columnCurrencyMap, rowList);
                                if (null != columnCurrencyMap) {
                                    columnCurrencyMap.clear();
                                }
                                TableExtractionResult extractionResult = new TableExtractionResult(rowList,
                                        tableExtractionRule.getRuleName());
                                allTableRowsList.add(extractionResult);
                            }

                            // Data extracted for columns in rows is different order from ui displayed table columns. Assign values to
                            // correct column.
                            if (isTableColumnListDirty) {
                                for (TableExtractionResult list : allTableRowsList) {
                                    if (null != list) {
                                        tableExtractionUtility.sortTableRowsInUiOrder(tableColumnsInfoList,
                                                list.getRowList());
                                    }
                                }
                            }
                            if (null != colHeaderInfoMap) {
                                colHeaderInfoMap.clear();
                            }
                            lineDataCarrierList.clear();
                            if (null != tableColumnVOList) {
                                tableColumnVOList.clear();
                            }
                        }
                    }
                }

                // Find the table with maximum valid data.
                tableExtractionUtility.addBestRuleRows(dataTable, allTableRowsList);
            }
        }
        return dataTable;
    }

    /**
     * Extracts table data.
     * 
     * @param lineDataCarrierList {@link List}<{@link LineDataCarrier}>
     * @param tableColumnList {@link List}<{@link TableColumnVO}>
     * @param colHeaderInfoMap {@link Map}<{@link String}, {@link DataCarrier}>
     * @param tableExtractionAPI {@link TableExtractionAPI}
     * @return {@link List}<{@link Row}>
     * @throws DCMAApplicationException
     */
    private List<Row> addDataTablesValues(final List<LineDataCarrier> lineDataCarrierList,
            final List<TableColumnVO> tableColumnList, final Map<String, DataCarrier> colHeaderInfoMap,
            final TableExtractionAPI tableExtractionAPI) throws DCMAApplicationException {
        List<Row> rowList = null;
        if (CollectionUtils.isNotEmpty(lineDataCarrierList) && CollectionUtils.isNotEmpty(tableColumnList)
                && null != tableExtractionAPI) {

            /** isRowValidForAllMandatoryColumns is a boolean variable used in context of multiline spanning column data in table. */
            boolean isRowValidForAllMandatoryColumns;

            /** previous row to merge with next row if mandatory is set for the column. */
            Row previousRow = new Row();
            previousRow.setIsRuleValid(false);
            previousRow.setMannualExtraction(false);
            boolean isRowAvaliable = false;
            boolean isRowValidForAllRequiredColumns = true;
            boolean allRowFinishTasksDone = true;
            final String tableExtractionAPIString = tableExtractionAPI.getTableExtractionAPI();
            final ExpressionEvaluator<Boolean> expressionEvaluator = new ExpressionEvaluator<Boolean>(
                    tableExtractionAPIString.toUpperCase());
            int lineIndex = TableExtractionConstants.START_INDEX;
            LineDataCarrier lineDataCarrier = null;
            boolean columnHeaderValidationRequired = tableExtractionAPI.isColHeaderValidationRequired();
            boolean columnCoordinateValidationRequired = tableExtractionAPI.isColCoordValidationRequired();
            boolean regexValidationRequired = tableExtractionAPI.isRegexValidationRequired();
            ColumnHeaderExtractionDataCarrier columnHeaderExtractionDataCarrier = null;
            while (lineIndex < lineDataCarrierList.size()) {
                lineDataCarrier = lineDataCarrierList.get(lineIndex);
                final String pageID = lineDataCarrier.getPageID();
                final String rowData = lineDataCarrier.getLineRowData();
                final List<Span> spanList = lineDataCarrier.getSpanList();
                LOGGER.info(EphesoftStringUtil.concatenate("Row Data : ", rowData));
                isRowValidForAllRequiredColumns = true;
                isRowValidForAllMandatoryColumns = true;
                final Row row = new Row();
                row.setIsRuleValid(false);
                row.setMannualExtraction(false);

                // By default setting rule validation as true.
                row.setIsRuleValid(true);
                Row.Columns columnsRow = TableRowFinderUtility.getRowColumns(lineDataCarrier, row);
                final List<Column> columnRowList = columnsRow.getColumn();
                isRowAvaliable = false;
                for (final TableColumnVO tableColumn : tableColumnList) {
                    LOGGER.info(EphesoftStringUtil.concatenate("Extracting column data for column = ",
                            tableColumn.getColumnName()));

                    // Search for all the table row data one by one.
                    final Column column = new Column();
                    column.setValid(false);
                    column.setValidationRequired(false);
                    column.setConfidence(0.0f);
                    column.setForceReview(false);
                    column.setOcrConfidence(0.0f);
                    column.setOcrConfidenceThreshold(0.0f);

                    TableRowFinderUtility.setColumnProperties(pageID, column, null, 0);
                    column.setName(tableColumn.getColumnName());
                    columnRowList.add(column);
                    DataCarrier colHeaderDataCarrier = null;
                    final Integer indexOfTableColumn = tableColumnList.indexOf(tableColumn);
                    boolean isRegexValidationPassed = false;
                    boolean isColHeaderValidationPassed = false;
                    boolean isColCoordValidationPassed = false;
                    boolean furtherValidationRequired = true;
                    Coordinates valueCoordinates = null;

                    // This confidence parameter for column header extraction mechanism indicates whether result obtained from this
                    // extraction matches 100% to the validation pattern. If it doesn't match, we search to look for extraction results
                    // by further algorithms (if applicable).
                    boolean isColumnHeaderExtractionConfidenceMaximum = false;

                    // This confidence parameter for column coordinate extraction mechanism indicates whether result obtained from this
                    // extraction matches 100% to the validation pattern. If it doesn't match, we search to look for extraction results
                    // by further algorithms (if applicable).
                    boolean isColumnCoordinateExtractionConfidenceMaximum = false;
                    if (columnHeaderValidationRequired) {
                        if (null != colHeaderInfoMap && !colHeaderInfoMap.isEmpty()) {
                            colHeaderDataCarrier = colHeaderInfoMap.get(tableColumn.getColumnName());
                        }
                        if (null == columnHeaderExtractionDataCarrier) {
                            columnHeaderExtractionDataCarrier = new ColumnHeaderExtractionDataCarrier();
                        }
                        columnHeaderExtractionDataCarrier.setColHeaderDataCarrier(colHeaderDataCarrier);
                        columnHeaderExtractionDataCarrier.setColumn(column);
                        columnHeaderExtractionDataCarrier.setLineDataCarrier(lineDataCarrier);
                        columnHeaderExtractionDataCarrier.setSpanList(spanList);
                        columnHeaderExtractionDataCarrier.setTableColumn(tableColumn);
                        TableExtractionAPIResult tableExtractionAPIResult = TableRowFinderUtility
                                .runColumnHeaderExtraction(columnHeaderExtractionDataCarrier,
                                        gapBetweenColumnWords);
                        valueCoordinates = tableExtractionAPIResult.getValueCoordinates();
                        isColumnHeaderExtractionConfidenceMaximum = tableExtractionAPIResult.isConfidenceMaximum();
                        isColHeaderValidationPassed = tableExtractionAPIResult.isValidationPassed();
                    }
                    if (TableRowFinderUtility.isDataValid(expressionEvaluator, false,
                            isColumnHeaderExtractionConfidenceMaximum, false)
                            || !TableRowFinderUtility.isDataValid(expressionEvaluator, true,
                                    isColHeaderValidationPassed, true)) {
                        LOGGER.info("No further validation required.....");
                        furtherValidationRequired = false;
                    }
                    if (furtherValidationRequired && columnCoordinateValidationRequired) {
                        LOGGER.info("Applying Column Coordinates Validation for table extraction....");
                        if (TableRowFinderUtility.isDataValid(expressionEvaluator, false, false, true)) {
                            TableExtractionAPIResult tableExtractionAPIResult = TableRowFinderUtility
                                    .runColumnCoordinateExtraction(pageID, tableColumn, column, spanList);
                            valueCoordinates = tableExtractionAPIResult.getValueCoordinates();
                            isColumnCoordinateExtractionConfidenceMaximum = tableExtractionAPIResult
                                    .isConfidenceMaximum();
                            isColCoordValidationPassed = tableExtractionAPIResult.isValidationPassed();
                        } else if (isColumnHeaderExtractionConfidenceMaximum && valueCoordinates != null) {
                            ColumnCoordinates columnCoordinates = TableRowFinderUtility
                                    .getXColumncoordinates(tableColumn);
                            if (TableRowFinderUtility.isColumnValidWithColCoord(valueCoordinates.getX0().intValue(),
                                    valueCoordinates.getX1().intValue(), columnCoordinates.getX0Coordinate(),
                                    columnCoordinates.getX1coordinate())) {
                                isColCoordValidationPassed = true;
                                isColumnCoordinateExtractionConfidenceMaximum = true;
                                column.setValid(Boolean.TRUE);
                                LOGGER.info("Getting rectangle coordinates for the value. ");
                                valueCoordinates = HocrUtil
                                        .getRectangleCoordinates(column.getCoordinatesList().getCoordinates());
                            }
                        } else {
                            TableExtractionAPIResult tableExtractionAPIResult = TableRowFinderUtility
                                    .runColumnCoordinateExtraction(pageID, tableColumn, column, spanList);
                            valueCoordinates = tableExtractionAPIResult.getValueCoordinates();
                            isColumnCoordinateExtractionConfidenceMaximum = tableExtractionAPIResult
                                    .isConfidenceMaximum();
                            isColCoordValidationPassed = tableExtractionAPIResult.isValidationPassed();
                        }

                        // Checking if column header validation and/or column coordinate validation results are confident enough with
                        // respect to table extraction API needed.
                        if ((TableRowFinderUtility.isDataValid(expressionEvaluator, false,
                                isColumnHeaderExtractionConfidenceMaximum,
                                isColumnCoordinateExtractionConfidenceMaximum)
                                || !TableRowFinderUtility.isDataValid(expressionEvaluator, true,
                                        isColHeaderValidationPassed, isColCoordValidationPassed))) {
                            LOGGER.info("No further validation required....");
                            furtherValidationRequired = false;
                        }
                    }
                    if (furtherValidationRequired && regexValidationRequired) {
                        RegexValidationDataCarrier regexvalidationDataCarrier = new RegexValidationDataCarrier();
                        regexvalidationDataCarrier.setColumn(column);
                        regexvalidationDataCarrier.setColumnCoordinates(valueCoordinates);
                        regexvalidationDataCarrier.setIndexOfTableColumn(indexOfTableColumn);
                        regexvalidationDataCarrier.setLineDataCarrier(lineDataCarrier);
                        regexvalidationDataCarrier.setPageID(pageID);
                        regexvalidationDataCarrier.setRowData(rowData);
                        regexvalidationDataCarrier.setSpanList(spanList);
                        regexvalidationDataCarrier.setTableColumn(tableColumn);
                        isRegexValidationPassed = TableRowFinderUtility.runRegexValidation(expressionEvaluator,
                                columnHeaderValidationRequired, columnCoordinateValidationRequired, columnRowList,
                                colHeaderDataCarrier, regexvalidationDataCarrier);
                    }
                    if (TableRowFinderUtility.isDataValid(expressionEvaluator, isRegexValidationPassed,
                            isColHeaderValidationPassed, isColCoordValidationPassed)) {
                        isRowAvaliable = true;
                        LOGGER.info("Data valid with respect to all the validations..");
                    } else {
                        TableRowFinderUtility.setColumnProperties(pageID, column, TableExtractionConstants.EMPTY,
                                0);
                    }
                    column.setName(tableColumn.getColumnName());
                    LOGGER.info("Checking if row contains valid data for mandatory columns.....");
                    if (tableColumn.isMultilineAnchor()
                            && (column.getValue() == null || column.getValue().isEmpty())) {
                        isRowValidForAllMandatoryColumns = false;
                    }
                }

                if (isRowAvaliable) {
                    LOGGER.info("Row is available.");
                    if (null == rowList) {
                        rowList = new ArrayList<Row>();
                    }

                    /**
                     * if executes if current row has data to be merged with last added row to rowList, because of missing content in
                     * atleast one mandatory column.
                     */
                    if (!isRowValidForAllMandatoryColumns && null != previousRow
                            && previousRow.getColumns() != null) {
                        final List<Column> previousRowColumnsList = previousRow.getColumns().getColumn();
                        LOGGER.info("Merging rows of multiline data...");
                        TableExtractionResultModifierUtility.mergeMultilineRows(previousRowColumnsList,
                                columnRowList);
                        TableExtractionResultModifierUtility.setMergedRowCoordinates(previousRow, row);
                    } else {

                        // Note: previousRow.getColumns() == null if for first row
                        isRowValidForAllRequiredColumns = TableExtractionResultModifierUtility
                                .finishTasksForPreviousRow(tableColumnList, rowList,
                                        isRowValidForAllRequiredColumns, previousRow, lineDataCarrier);
                        if (!isRowValidForAllRequiredColumns) {
                            LOGGER.debug(
                                    "Removed previous row from rowList as required column data was missing from it.");
                        }
                        allRowFinishTasksDone = false;
                        LOGGER.info("Adding a new row to the row list...");
                        rowList.add(row);
                        previousRow = row;
                    }
                }
                lineIndex++;
            }

            // following executes for last row in extracted table data.
            if (!allRowFinishTasksDone) {
                isRowValidForAllRequiredColumns = TableExtractionResultModifierUtility.finishTasksForPreviousRow(
                        tableColumnList, rowList, isRowValidForAllRequiredColumns, previousRow, lineDataCarrier);
                if (!isRowValidForAllRequiredColumns) {
                    LOGGER.debug("Removed previous row from rowList as required column data was missing from it.");
                }
            }
        }
        return rowList;
    }

    /**
     * Apply table row validation of each row of list.
     * 
     * @param ruleOperatorDefinedForTable {@link String}
     * @param listofTableRules {@link List}<{@link TableRuleInfo}>
     * @param isRemoveInvalidRows boolean
     * @param columnCurrencyMap {@link Map<{@link String}, {@link CurrencyCode}>
     * @param rowList {@link List}<{@link Row}>
     */
    private void applyTableValidationRulesOnRows(final String ruleOperatorDefinedForTable,
            final List<TableRuleInfo> listofTableRules, final boolean isRemoveInvalidRows,
            final Map<String, CurrencyCode> columnCurrencyMap, List<Row> rowList) {
        if (rowList != null && !rowList.isEmpty()) {

            // Added to delete rows with invalid row as per rule when Remove invalid rows is true.
            final List<Row> listToBeDeleted = new ArrayList<Row>(rowList.size());
            StringBuilder invalidCharactesString = null;
            if (!EphesoftStringUtil.isNullOrEmpty(invalidRuleCharacters)) {
                final String[] inValidCharcters = invalidRuleCharacters.split(TableExtractionConstants.SEMI_COLON);
                if (inValidCharcters != null && inValidCharcters.length > 0) {
                    invalidCharactesString = new StringBuilder();
                    for (int index = 0; index < inValidCharcters.length; index++) {
                        invalidCharactesString.append(inValidCharcters[index]);
                    }
                }
            }

            // Multiple rule validation implementation
            for (Row tableRow : rowList) {

                // if multiple rule validation operator is "and".
                if (TableExtractionConstants.AND_OPERATOR.equalsIgnoreCase(ruleOperatorDefinedForTable)) {
                    for (int index = 0; index < listofTableRules.size(); index++) {
                        if (!isRuleValid(listofTableRules.get(index).getRule(), tableRow, invalidCharactesString,
                                columnCurrencyMap)) {
                            setRowForInvalidRule(tableRow, listToBeDeleted);
                            break;
                        } else {
                            setRowForValidRule(tableRow, listToBeDeleted);
                        }
                    }
                } else {
                    for (int index = 0; index < listofTableRules.size(); index++) {
                        if (!isRuleValid(listofTableRules.get(index).getRule(), tableRow, invalidCharactesString,
                                columnCurrencyMap)) {
                            setRowForInvalidRule(tableRow, listToBeDeleted);
                        } else {
                            setRowForValidRule(tableRow, listToBeDeleted);
                            break;
                        }
                    }
                }
            }

            // Removes invalid rows based on rule defined if this property has been set as checked.
            if (isRemoveInvalidRows) {
                rowList.removeAll(listToBeDeleted);
            }
        }
    }

    /**
     * API to set row property isRuleValid true when rule validation passes and remove it from the list to be deleted if
     * isRemoveInvalidRows property is unchecked.
     * 
     * @param tableRow
     * @param listToBeDeleted
     */
    private void setRowForValidRule(Row tableRow, List<Row> listToBeDeleted) {
        setAllRowValid(tableRow);
        listToBeDeleted.remove(tableRow);
    }

    /**
     * API to set row property isRuleValid false when rule validation fails and add it to the list of rows to be deleted if
     * isRemoveInvalidRows property is checked
     * 
     * @param tableRow
     * @param listToBeDeleted
     */
    private void setRowForInvalidRule(Row tableRow, List<Row> listToBeDeleted) {
        setAllRowInvalid(tableRow);
        listToBeDeleted.add(tableRow);
    }

    /**
     * Returns true if rule defined for table is valid otherwise returns false.
     * 
     * @param ruleDefinedForTable {@link String} The rule defined for a table.
     * @param row {@link Row} The row of a table.
     * @param columnCurrencyMap {@link Map}<{@link String}, {@link CurrencyCode}> is used to map the currency codes corresponding to
     *            the different column names. If null currency extraction is not performed.
     * @return true if rule defined for table is valid otherwise returns false.
     * @throws Exception {@link Exception}
     */
    private boolean isRuleValid(final String ruleDefinedForTable, final Row row,
            final StringBuilder invalidCharactesString, final Map<String, CurrencyCode> columnCurrencyMap) {
        boolean isValid = false;
        try {
            if (!EphesoftStringUtil.isNullOrEmpty(ruleDefinedForTable)) {
                Map<String, Integer> relationalOperators = createRelationalOperatorsMap();
                String selectedOperator = getSelectedRelationalOperator(ruleDefinedForTable, relationalOperators);
                final int indexOfRuleDefined = ruleDefinedForTable.indexOf(selectedOperator);
                if (indexOfRuleDefined != -1) {
                    final String ruleDefinedRHS = ruleDefinedForTable.substring(
                            indexOfRuleDefined + selectedOperator.length(), ruleDefinedForTable.length());
                    final String ruleDefinedLHS = ruleDefinedForTable.substring(0, indexOfRuleDefined - 1);
                    if (!EphesoftStringUtil.isNullOrEmpty(ruleDefinedLHS)
                            && !EphesoftStringUtil.isNullOrEmpty(ruleDefinedRHS)) {
                        final String[] columnsRHS = EphesoftStringUtil.splitString(ruleDefinedRHS.trim(),
                                TableExtractionConstants.AMPERSAND_STRING);
                        final String[] columnsLHS = EphesoftStringUtil.splitString(ruleDefinedLHS.trim(),
                                TableExtractionConstants.AMPERSAND_STRING);
                        if (columnsLHS != null && columnsLHS.length > 0 && columnsRHS != null
                                && columnsRHS.length > 0) {
                            final StringBuilder ruleModifiedForExpressionEvaluatorLHS = new StringBuilder();
                            final StringBuilder ruleModifiedForExpressionEvaluatorRHS = new StringBuilder();
                            final Map<String, Object> columnValuesMapLHS = getColumnValuesMap(row,
                                    getColumnNamesMap(ruleModifiedForExpressionEvaluatorLHS, columnsLHS),
                                    invalidCharactesString, columnCurrencyMap);
                            if (columnValuesMapLHS != null
                                    && columnValuesMapLHS.get(TableExtractionConstants.TABLE_RULE_KEY) == null) {
                                final Map<String, Object> columnValuesMapRHS = getColumnValuesMap(row,
                                        getColumnNamesMap(ruleModifiedForExpressionEvaluatorRHS, columnsRHS),
                                        invalidCharactesString, columnCurrencyMap);
                                if (columnValuesMapRHS != null && columnValuesMapRHS
                                        .get(TableExtractionConstants.TABLE_RULE_KEY) == null) {
                                    final ExpressionEvaluator<Double> expressionEvaluatorLHS = getColumnsExpressionEvaluator(
                                            ruleModifiedForExpressionEvaluatorLHS.toString(), columnValuesMapLHS);
                                    final ExpressionEvaluator<Double> expressionEvaluatorRHS = getColumnsExpressionEvaluator(
                                            ruleModifiedForExpressionEvaluatorRHS.toString(), columnValuesMapRHS);
                                    if (expressionEvaluatorLHS != null && expressionEvaluatorRHS != null) {
                                        isValid = checkExpressionsForSelectedInequality(relationalOperators,
                                                selectedOperator, expressionEvaluatorLHS, expressionEvaluatorRHS);
                                    }
                                }

                            }
                        }
                    }
                }
            }
        } catch (final Exception exception) {
            LOGGER.error(EphesoftStringUtil.concatenate("Exception occurred while validating rule for a table row",
                    exception.getMessage()));
        }
        return isValid;
    }

    /**
     * Returns the relational operator used in the rule.
     * 
     * @param ruleDefinedForTable {@link String} the table rule.
     * @param relationalOperators {@link Map} map of all the relational operators.
     * @return {@link String} the relational operator used in the rule.
     */
    private String getSelectedRelationalOperator(final String ruleDefinedForTable,
            Map<String, Integer> relationalOperators) {
        String selectedOperator = null;
        Set<String> operators = relationalOperators.keySet();
        for (String operator : operators) {
            if (ruleDefinedForTable.indexOf(operator) != -1) {
                selectedOperator = operator;
            }
        }
        return selectedOperator;
    }

    /**
     * Creates a map of relational operators that may occur in an expression.
     * 
     * @return {@link Map} map of relational operators.
     */
    private Map<String, Integer> createRelationalOperatorsMap() {
        Map<String, Integer> relationalOperators = new HashMap<String, Integer>();
        relationalOperators.put(TableExtractionConstants.CHAR_EQUAL, 0);
        relationalOperators.put(TableExtractionConstants.NOT_EQUAL, 1);
        relationalOperators.put(TableExtractionConstants.GREATER_OR_EQUAL, 2);
        relationalOperators.put(TableExtractionConstants.LESS_OR_EQUAL, 3);
        return relationalOperators;
    }

    /**
     * Checks the passed expressions for relational inequalities like '==','<=','>=','!='.
     * 
     * @param relationalOperators {@link Map} map of relational operators.
     * @param selectedOperator {@link String} selected operator.
     * @param expressionEvaluatorLHS {@link String} expression on the right hand side of equality.
     * @param expressionEvaluatorLHS {@link String} expression on the left hand side of equality.
     * @return {@link boolean} true if the inequality is satisfied, false otherwise.
     */
    private boolean checkExpressionsForSelectedInequality(Map<String, Integer> relationalOperators,
            String selectedOperator, final ExpressionEvaluator<Double> expressionEvaluatorLHS,
            final ExpressionEvaluator<Double> expressionEvaluatorRHS) throws ClassCastException, ScriptException {
        boolean isValid = false;
        final Double expressionResultLHS = expressionEvaluatorLHS.eval();
        final Double expressionResultRHS = expressionEvaluatorRHS.eval();
        if (selectedOperator != null && expressionResultLHS != null && expressionResultRHS != null) {
            // Formatting required because of results provided by the engine were not formatted.
            final double expressionLHSValue = NumberUtil.getRoundedValue(expressionResultLHS);
            final double expressionRHSValue = NumberUtil.getRoundedValue(expressionResultRHS);
            switch (relationalOperators.get(selectedOperator)) {
            case 0:
                isValid = (expressionLHSValue == expressionRHSValue);
                break;
            case 1:
                isValid = (expressionLHSValue != expressionRHSValue);
                break;
            case 2:
                isValid = (expressionLHSValue >= expressionRHSValue);
                break;
            case 3:
                isValid = (expressionLHSValue <= expressionRHSValue);
                break;
            default:
            }
        }
        return isValid;
    }

    /**
     * Returns the instance of {@link ExpressionEvaluator} for the rule defined for a particular table and table columns.
     * 
     * @param ruleDefined {@link String} Rule defined for the table.
     * @param columnsValuesMap {@link Map} contains the map for the table column value.
     * @return The instance of {@link ExpressionEvaluator}.
     */
    private ExpressionEvaluator<Double> getColumnsExpressionEvaluator(final String ruleDefined,
            final Map<String, Object> columnsValuesMap) {
        ExpressionEvaluator<Double> expressionEvaluator = null;
        if (columnsValuesMap != null && !columnsValuesMap.isEmpty()) {
            expressionEvaluator = new ExpressionEvaluator<Double>(ruleDefined);
            final Set<String> columnValuesKeySet = columnsValuesMap.keySet();
            final Iterator<String> columnValuesKeyItr = columnValuesKeySet.iterator();
            while (columnValuesKeyItr.hasNext()) {
                final String key = columnValuesKeyItr.next();
                expressionEvaluator.putValue(key, columnsValuesMap.get(key));
            }
        }
        return expressionEvaluator;
    }

    /**
     * Modifies the column name of the table and creates the modified rule used by expression evaluator and returns the table columns
     * name map.
     * 
     * @param ruleModifiedForExpressionEvaluator {@link String} The modified rule used by expression evaluator.
     * @param columns {@link String} The array of table columns.
     * @return {@link String} The table columns name map.
     */
    private Map<String, String> getColumnNamesMap(final StringBuilder ruleModifiedForExpressionEvaluator,
            final String[] columns) {
        Map<String, String> columnNamesMap = null;
        if (columns != null) {
            columnNamesMap = new LinkedHashMap<String, String>();
            for (int index = 0; index < columns.length; index++) {
                String tempString = columns[index];
                if (tempString != null) {
                    if (index % 2 != 0) {
                        final String columnNamesKey = EphesoftStringUtil.concatenate("ID", index);
                        columnNamesMap.put(columnNamesKey, tempString.trim());
                        tempString = columnNamesKey;
                    }
                    ruleModifiedForExpressionEvaluator.append(tempString);
                }
            }
        }
        return columnNamesMap;
    }

    /**
     * Returns the column values map.
     * 
     * @param row {@link Row} The row of table.
     * @param columnNamesMap {@link Map}<{@link String}, {@link String}> The columns name map.
     * @param invalidCharactesString {@link StringBuilder}
     * @param columnCurrencyMap {@link Map}<{@link String}, {@link CurrencyCode}>
     * @return {@link Map}<{@link String}, {@link Object}> The column values map.
     */
    private Map<String, Object> getColumnValuesMap(final Row row, final Map<String, String> columnNamesMap,
            final StringBuilder invalidCharactesString, final Map<String, CurrencyCode> columnCurrencyMap) {
        Map<String, Object> columnValuesMap = null;
        if (row != null && columnNamesMap != null) {
            columnValuesMap = new LinkedHashMap<String, Object>();
            Set<String> columnsNameset = columnNamesMap.keySet();
            Iterator<String> columnsNameItr = columnsNameset.iterator();
            CurrencyCode columnCurrency = null;
            while (columnsNameItr.hasNext()) {
                String columnNameKey = columnsNameItr.next();
                String columnName = columnNamesMap.get(columnNameKey);
                if (columnCurrencyMap != null) {
                    columnCurrency = columnCurrencyMap.get(columnName);
                }
                String columnValue = getColumnValue(row, columnName, columnCurrency);
                if (invalidCharactesString != null
                        && !EphesoftStringUtil.isNullOrEmpty(invalidCharactesString.toString())) {
                    columnValue = removeCharacters(columnValue, invalidCharactesString.toString().toCharArray());
                }
                if (!EphesoftStringUtil.isNullOrEmpty(columnValue)
                        && columnValue.matches(TableExtractionConstants.TABLE_RULE_REGEX)) {
                    columnValuesMap.put(columnNameKey, Double.valueOf(columnValue));
                } else {
                    columnValuesMap.put(TableExtractionConstants.TABLE_RULE_KEY, false);
                    break;
                }
            }
        }
        return columnValuesMap;
    }

    /**
     * Returns the value of the columnName passed for the table row. If <code>cuurencyCode</code> is not <code>NULL</code>, then
     * currencyExtraction rules are applied to the extracted value corresponding to the code.
     * 
     * @param row {@link Row} row from which the values are to be extracted.
     * @param columnPassed {@link String} name of the column whose value is to be extracted.
     * @param currencyCode {@link CurrencyCode} implies the rules for currency extraction. If null currency extraction is not
     *            performed.
     * @return {@link String} Value extracted corresponding to the column.
     */
    private String getColumnValue(final Row row, final String columnPassed, final CurrencyCode currencyCode) {
        String extractedValue = getColumnValue(row, columnPassed);
        if (currencyCode != null) {
            Double currencyValue = CurrencyUtil.getDoubleValue(extractedValue,
                    currencyCode.getRepresentationValue());
            if (currencyValue != null) {
                extractedValue = currencyValue.toString();
            }
        }
        LOGGER.info(EphesoftStringUtil.concatenate("Extracted value for ", columnPassed, " is ", extractedValue));
        return extractedValue;
    }

    private String removeCharacters(final String strToBeModified, final char[] charsToBeReplaced) {
        String resultString = null;
        if (!EphesoftStringUtil.isNullOrEmpty(strToBeModified) && charsToBeReplaced != null) {
            StringBuilder resultStringBuilder = new StringBuilder(strToBeModified.length());
            char[] words = strToBeModified.toCharArray();
            for (int index = 0; index < words.length; index++) {
                boolean matchFailed = true;
                for (int charIndex = 0; charIndex < charsToBeReplaced.length; charIndex++) {
                    if (words[index] == charsToBeReplaced[charIndex]) {
                        resultStringBuilder.append(TableExtractionConstants.EMPTY);
                        matchFailed = false;
                        break;
                    }
                }
                if (matchFailed) {
                    resultStringBuilder.append(words[index]);
                }
            }
            resultString = resultStringBuilder.toString();
        }

        return resultString;
    }

    /**
     * Returns the value of the columnName passed for the table row.
     * 
     * @param row {@link Row} The row of the table.
     * @param columnPassed {@link String} The column name passed.
     * @return {@link String} The value of column name passed.
     */
    private String getColumnValue(final Row row, final String columnPassed) {
        String columnValue = null;
        if (row != null || columnPassed != null) {
            Row.Columns columns = row.getColumns();
            if (columns != null) {
                List<Column> columnList = columns.getColumn();
                if (columnList != null && !columnList.isEmpty()) {
                    for (Column column : columnList) {
                        if (column != null && columnPassed.trim().equalsIgnoreCase(column.getName())) {
                            columnValue = column.getValue();
                            break;
                        }
                    }
                }
            }
        }
        return columnValue;
    }

    /**
     * Sets all the row of a table to invalid.
     * 
     * @param row{{@link Row} The row of a table.
     */
    private void setAllRowInvalid(final Row row) {
        if (row != null) {
            row.setIsRuleValid(false);
        }
    }

    /**
     * Sets all the row of a table to valid.
     * 
     * @param row{{@link Row} The row of a table.
     */
    private void setAllRowValid(final Row row) {
        if (row != null) {
            row.setIsRuleValid(true);
        }
    }

    /**
     * @return the invalidRuleCharacters
     */
    public String getInvalidRuleCharacters() {
        return invalidRuleCharacters;
    }

    /**
     * @param invalidRuleCharacters the invalidRuleCharacters to set
     */
    public void setInvalidRuleCharacters(final String invalidRuleCharacters) {
        this.invalidRuleCharacters = invalidRuleCharacters;
    }
}