org.talend.dataprep.api.dataset.row.DataSetRow.java Source code

Java tutorial

Introduction

Here is the source code for org.talend.dataprep.api.dataset.row.DataSetRow.java

Source

// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// https://github.com/Talend/data-prep/blob/master/LICENSE
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================

package org.talend.dataprep.api.dataset.row;

import static java.util.stream.Collectors.joining;
import static java.util.stream.StreamSupport.stream;
import static org.talend.dataprep.api.dataset.row.FlagNames.INTERNAL_PROPERTY_PREFIX;
import static org.talend.dataprep.api.dataset.row.FlagNames.TDP_INVALID;

import java.io.Serializable;
import java.util.*;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.apache.commons.lang.StringUtils;
import org.talend.dataprep.api.dataset.ColumnMetadata;
import org.talend.dataprep.api.dataset.RowMetadata;
import org.talend.dataprep.api.type.Type;

/**
 * A DataSetRow is a row of a dataset. Values in data set row are <b>alphabetically</b> ordered by name.
 */
public class DataSetRow implements Cloneable, Serializable {

    /**
     * <p>
     * Filter for {@link #toArray(Predicate[])} that filters out TDP_ID column in results.
     * </p>
     * <p>
     * Example:<br/>
     * <code>
     *      String[] filteredValues = row.toArray(DataSetRow.SKIP_TDP_ID);
     * </code>
     * </p>
     */
    public static final Predicate<Map.Entry<String, String>> SKIP_TDP_ID = e -> !FlagNames.TDP_ID
            .equals(e.getKey());

    /** Metadata information (columns...) about this DataSetRow */
    private RowMetadata rowMetadata;

    /** Values of the dataset row. */
    private Map<String, String> values = new TreeMap<>();

    /** True if this row is deleted. */
    private boolean deleted;

    /** the old value used for the diff. */
    private DataSetRow oldValue;

    /** Row id */
    private Long rowId;

    /** A structure to speed up invalid related operations */
    private final Set<String> invalidColumnIds = new HashSet<>();

    /**
     * Constructor with values.
     */
    public DataSetRow(RowMetadata rowMetadata) {
        this.rowMetadata = rowMetadata;
        this.deleted = false;
    }

    /**
     * Constructor with values.
     *
     * @param values the row value.
     */
    public DataSetRow(RowMetadata rowMetadata, Map<String, ?> values) {
        this(rowMetadata);
        values.forEach((k, v) -> set(k, String.valueOf(v)));
    }

    public DataSetRow(Map<String, String> values) {
        for (Map.Entry<String, String> entry : values.entrySet()) {
            set(entry.getKey(), entry.getValue());
        }
        List<ColumnMetadata> columns = values.keySet().stream() //
                .map(columnName -> ColumnMetadata.Builder.column().name(columnName).type(Type.STRING).build()) //
                .collect(Collectors.toList());
        rowMetadata = new RowMetadata(columns);
    }

    /**
     * @return The {@link RowMetadata metadata} that describes the current values.
     */
    public RowMetadata getRowMetadata() {
        return rowMetadata;
    }

    public void setRowMetadata(RowMetadata rowMetadata) {
        this.rowMetadata = rowMetadata;
    }

    /**
     * Set an entry in the dataset row
     *
     * @param id - the key
     * @param value - the value
     */
    public DataSetRow set(final String id, final String value) {
        if (TDP_INVALID.equals(id)) {
            final List<String> ids = Arrays.asList(value.split(","));
            invalidColumnIds.addAll(ids);
        } else if (FlagNames.TDP_ID.equals(id)) {
            setTdpId(Long.parseLong(value));
        } else {
            values.put(id, value);
        }

        return this;
    }

    /**
     * Get the value associated with the provided key
     *
     * @param id the column id.
     * @return - the value as string
     */
    public String get(final String id) {
        if (StringUtils.startsWith(id, INTERNAL_PROPERTY_PREFIX)) {
            return getInternalValues().get(id);
        } else {
            return values.get(id);
        }
    }

    /**
     * Check if the row is deleted
     */
    public boolean isDeleted() {
        return this.deleted;
    }

    /**
     * Set whether the row is deleted
     */
    public void setDeleted(boolean deleted) {
        this.deleted = deleted;
    }

    /**
     * Set the old row for diff
     *
     * @param oldRow - the original row
     */
    public void diff(final DataSetRow oldRow) {
        this.oldValue = oldRow;
    }

    /**
     * Here we decide the flags to set and write is to the response
     * <ul>
     * <li>flag NEW : deleted by old but not by new</li>
     * <li>flag UPDATED : not deleted at all and value has changed</li>
     * <li>flag DELETED : not deleted by old by is by new</li>
     * </ul>
     */
    public Map<String, Object> values() {

        final Map<String, Object> result = new LinkedHashMap<>(values.size() + 1);

        // put all invalid column ids
        getInternalValues().entrySet().forEach(e -> {
            if (!StringUtils.isEmpty(e.getValue())) {
                values.put(e.getKey(), e.getValue());
            }
        });

        // if not old value, no diff to compute
        if (this.oldValue == null) {
            result.putAll(values);
            return result;
        }

        // row is no more deleted : we write row values with the *NEW* flag
        if (oldValue.isDeleted() && !isDeleted()) {
            result.put(FlagNames.ROW_DIFF_KEY, Flag.NEW.getValue());
            result.putAll(values);
        }
        // row has been deleted : we write row values with the *DELETED* flag
        else if (!oldValue.isDeleted() && isDeleted()) {
            result.put(FlagNames.ROW_DIFF_KEY, Flag.DELETE.getValue());
            result.putAll(oldValue.values());
        }

        // row has been updated : write the new values and get the diff for each value, then write the DIFF_KEY
        // property

        final Map<String, Object> diff = new HashMap<>();
        final Map<String, Object> originalValues = oldValue.values();

        // compute the new value (column is not found in old value)
        values.entrySet().forEach(entry -> {
            if (!originalValues.containsKey(entry.getKey())) {
                diff.put(entry.getKey(), Flag.NEW.getValue());
            }
        });

        // compute the deleted values (column is deleted)
        originalValues.entrySet().forEach(entry -> {
            if (!values.containsKey(entry.getKey())) {
                diff.put(entry.getKey(), Flag.DELETE.getValue());
                // put back the original entry so that the value can be displayed
                set(entry.getKey(), (String) entry.getValue());
            }
        });

        // compute the update values (column is still here but value is different)
        values.entrySet().forEach(entry -> {
            if (originalValues.containsKey(entry.getKey())) {
                final Object originalValue = originalValues.get(entry.getKey());
                if (!StringUtils.equals(entry.getValue(), (String) originalValue)) {
                    diff.put(entry.getKey(), Flag.UPDATE.getValue());
                }
            }
        });

        result.putAll(values);
        if (!diff.isEmpty()) {
            result.put(FlagNames.DIFF_KEY, diff);
        }

        return result;
    }

    public Map<String, Object> valuesWithId() {
        final Map<String, Object> temp = values();
        if (getTdpId() != null) {
            temp.put(FlagNames.TDP_ID, getTdpId());
        }
        return temp;
    }

    /**
     * Clear all values in this row and reset state as it was when created (e.g. {@link #isDeleted()} returns
     * <code>false</code>).
     */
    public void clear() {
        deleted = false;
        oldValue = null;
        rowId = null;
        values.clear();
        invalidColumnIds.clear();
    }

    /**
     * @see Cloneable#clone()
     */
    @Override
    public DataSetRow clone() {
        final DataSetRow clone = new DataSetRow(rowMetadata, values);
        clone.invalidColumnIds.addAll(invalidColumnIds);
        clone.setDeleted(this.isDeleted());
        clone.setTdpId(this.rowId);
        return clone;
    }

    /**
     * Determine if the row should be written
     */
    public boolean shouldWrite() {
        if (this.oldValue == null) {
            return !isDeleted();
        } else {
            return !oldValue.isDeleted() || !isDeleted();
        }
    }

    /**
     * @see Objects#equals(Object, Object)
     */
    @Override
    public boolean equals(Object o) {
        if (this == o)
            return true;
        if (o == null || getClass() != o.getClass())
            return false;
        DataSetRow that = (DataSetRow) o;
        return Objects.equals(deleted, that.deleted) && Objects.equals(values, that.values)
                && Objects.equals(rowId, that.rowId);
    }

    /**
     * @see Objects#hash(Object...)
     */
    @Override
    public int hashCode() {
        return Objects.hash(deleted, values);
    }

    @Override
    public String toString() {
        return "DataSetRow{" + //
                "rowMetadata=" + rowMetadata + //
                ", values=" + values + //
                ", deleted=" + deleted + //
                ", oldValue=" + oldValue + //
                ", rowId=" + rowId + //
                '}';
    }

    /**
     * Order values of this data set row according to <code>columns</code>. This method clones the current record, so no
     * need to call {@link #clone()}.
     *
     * @param columns The columns to be used to order values.
     * @return A new data set row for method with values ordered following <code>columns</code>.
     */
    public DataSetRow order(List<ColumnMetadata> columns) {
        if (columns == null) {
            throw new IllegalArgumentException("Columns cannot be null.");
        }
        if (columns.isEmpty()) {
            return this;
        }
        if (columns.size() < values.size()) {
            throw new IllegalArgumentException("Expected " + values.size() + " columns but got " + columns.size());
        }

        Map<String, String> orderedValues = new LinkedHashMap<>();
        for (ColumnMetadata column : columns) {
            final String id = column.getId();
            orderedValues.put(id, values.get(id));
        }

        final DataSetRow dataSetRow = new DataSetRow(rowMetadata);
        dataSetRow.values = orderedValues;
        return dataSetRow;
    }

    /**
     * Order values of this data set row according to its own <code>columns</code>. This method clones the current
     * record, so no need to call {@link #clone()}.
     *
     * @return A new data set row for method with values ordered following its <code>columns</code>.
     */
    public DataSetRow order() {
        return order(getRowMetadata().getColumns());
    }

    /**
     * Removes the value with the specified id and removes the column metadata if it has not been already removed, and
     * returns <tt>true</tt> if the value has been removed. If this dataset row does not contain the specified it, it
     * is unchanged and returns <tt>false</tt>.
     *
     * @param id the id of the value to be removed
     * @return <tt>true</tt> if the specified column metadata is in this datasetrow and <tt>false</tt> otherwise
     */
    public boolean deleteColumnById(String id) {
        rowMetadata.deleteColumnById(id);

        if (values.containsKey(id)) {
            values.remove(id);
            return true;
        }
        return false;
    }

    /**
     * Returns the current row as an array of Strings.
     *
     * @param filters An optional set of {@link Predicate filters} to be used to filter values. See {@link #SKIP_TDP_ID}
     * for example.
     * @return The current row as array of String eventually with filtered out columns depending on filter.
     */
    @SafeVarargs
    public final String[] toArray(Predicate<Map.Entry<String, String>>... filters) {
        Stream<Map.Entry<String, String>> stream = stream(values.entrySet().spliterator(), false);
        // Apply filters
        for (Predicate<Map.Entry<String, String>> filter : filters) {
            stream = stream.filter(filter);
        }
        // Get as string array the selected columns
        final List<String> strings = stream.map(Map.Entry::getValue) //
                .map(String::valueOf) //
                .collect(Collectors.toList());
        return strings.toArray(new String[strings.size()]);
    }

    public Long getTdpId() {
        return rowId;
    }

    public void setTdpId(Long tdpId) {
        this.rowId = tdpId;
    }

    /**
     * @return <code>true</code> if row has no value / or / only contains empty strings / or / null strings.
     * <code>false</code> otherwise.
     */
    public boolean isEmpty() {
        return values.isEmpty() || values.values().stream().filter(s -> !StringUtils.isEmpty(s)).count() == 0;
    }

    /**
     * @return A {@link DataSetRow} as 'unmodifiable': all previously set values cannot change (changes would be
     * silently ignored), setting a new column will set empty string (value will be discarded).
     */
    public DataSetRow unmodifiable() {
        return new UnmodifiableDataSetRow(this);
    }

    public DataSetRow modifiable() {
        return this;
    }

    public DataSetRow filter(List<ColumnMetadata> filteredColumns) {
        final Set<String> columnsToKeep = filteredColumns.stream().map(ColumnMetadata::getId)
                .collect(Collectors.toSet());
        final Set<String> columnsToDelete = values.entrySet().stream()
                .filter(e -> !columnsToKeep.contains(e.getKey())) //
                .map(Map.Entry::getKey).collect(Collectors.toSet());
        final RowMetadata rowMetadataClone = rowMetadata.clone();
        final LinkedHashMap<String, String> filteredValues = new LinkedHashMap<>(this.values);
        for (String columnId : columnsToDelete) {
            filteredValues.remove(columnId);
            rowMetadataClone.deleteColumnById(columnId);
        }
        final DataSetRow filteredDataSetRow = new DataSetRow(rowMetadataClone, filteredValues);
        filteredDataSetRow.invalidColumnIds.addAll(invalidColumnIds);
        return filteredDataSetRow;
    }

    /**
     * Check if a column has an invalid value in this row.
     *
     * @param columnId A column id in the line.
     * @return <code>true</code> if column is marked as invalid in row, <code>false</code> otherwise or if column does not exist.
     */
    public boolean isInvalid(String columnId) {
        final String currentInvalidColumnIds = get(TDP_INVALID);
        return currentInvalidColumnIds != null && currentInvalidColumnIds.contains(columnId);
    }

    /**
     * Mark column <code>columnId</code> as invalid.
     *
     * @param columnId A column id in the line.
     * @see #unsetInvalid(String)
     */
    public void setInvalid(String columnId) {
        invalidColumnIds.add(columnId);
    }

    /**
     * Unmark column <code>columnId</code> as invalid.
     *
     * @param columnId A column id in the line.
     * @see #setInvalid(String)
     */
    public void unsetInvalid(String columnId) {
        invalidColumnIds.remove(columnId);
    }

    /**
     * @return All technical/internal values in this line (values not meant to be displayed as is).
     * @see FlagNames
     */
    public Map<String, String> getInternalValues() {
        final Map<String, String> internalValues = new HashMap<>(1);
        internalValues.put(TDP_INVALID, invalidColumnIds.stream().collect(joining(",")));
        return internalValues;
    }

    /**
     * A wrapper implementation of {@link DataSetRow} that prevents changes on previous values and set empty string for
     * all new columns. This implementation allows modification on {@link RowMetadata}.
     *
     * @see #set(String, String)
     */
    private static class UnmodifiableDataSetRow extends DataSetRow {

        private final DataSetRow delegate;

        private final boolean deleted;

        private UnmodifiableDataSetRow(DataSetRow delegate) {
            super(delegate.rowMetadata);
            this.delegate = delegate;
            deleted = delegate.isDeleted();
        }

        @Override
        public RowMetadata getRowMetadata() {
            return delegate.getRowMetadata();
        }

        /**
         * This method prevents changes on previous values and set empty string for all new columns.
         *
         * @param id - the key A column name.
         * @param value - the value The value to be set for column name.
         * @return This data set row for chaining calls.
         */
        @Override
        public DataSetRow set(String id, String value) {
            if (delegate.get(id) == null) {
                return delegate.set(id, StringUtils.EMPTY);
            }
            return this;
        }

        @Override
        public String get(String id) {
            return delegate.get(id);
        }

        @Override
        public boolean isDeleted() {
            return deleted;
        }

        @Override
        public void setDeleted(boolean deleted) {
            // UnmodifiableDataSetRow means unmodifiable
        }

        @Override
        public void diff(DataSetRow oldRow) {
            delegate.diff(oldRow);
        }

        @Override
        public Map<String, Object> values() {
            return Collections.unmodifiableMap(delegate.values());
        }

        @Override
        public Map<String, Object> valuesWithId() {
            return Collections.unmodifiableMap(delegate.valuesWithId());
        }

        @Override
        public void clear() {
            // UnmodifiableDataSetRow means unmodifiable
        }

        @Override
        public DataSetRow clone() { // NOSONAR
            return this;
        }

        @Override
        public boolean shouldWrite() {
            return delegate.shouldWrite();
        }

        @Override
        public boolean equals(Object o) { // NOSONAR
            return delegate.equals(o);
        }

        @Override
        public int hashCode() {
            return delegate.hashCode();
        }

        @Override
        public String toString() {
            return delegate.toString();
        }

        @Override
        public DataSetRow order(List<ColumnMetadata> columns) {
            return delegate.order(columns);
        }

        @Override
        public Long getTdpId() {
            return delegate.getTdpId();
        }

        @Override
        public void setTdpId(Long tdpId) {
            // UnmodifiableDataSetRow means unmodifiable
        }

        @Override
        public DataSetRow unmodifiable() {
            return this;
        }

        @Override
        public DataSetRow modifiable() {
            return delegate;
        }
    }
}