org.talend.dataprep.api.dataset.RowMetadata.java Source code

Java tutorial

Introduction

Here is the source code for org.talend.dataprep.api.dataset.RowMetadata.java

Source

// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// https://github.com/Talend/data-prep/blob/master/LICENSE
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================

package org.talend.dataprep.api.dataset;

import java.io.Serializable;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;

import javax.annotation.Nonnull;

import org.apache.avro.Schema;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.talend.dataprep.api.dataset.json.ColumnContextDeserializer;
import org.talend.dataprep.api.dataset.row.Flag;
import org.talend.dataprep.api.dataset.row.RowMetadataUtils;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;

/**
 * Models metadata information for a row of a data set.
 * More of a DI Schema Metadata albeit it contains indirectly data quality reports and statistics from its dataset.
 */
public class RowMetadata implements Serializable {

    /** Serialization UID. */
    private static final long serialVersionUID = 1L;

    /** Class logger. */
    private static final Logger LOGGER = LoggerFactory.getLogger(RowMetadata.class);

    private static final String COLUMN_ID_PATTERN = "0000";

    /** List of row metadata. */
    @JsonProperty("columns")
    @JsonDeserialize(using = ColumnContextDeserializer.class)
    private final List<ColumnMetadata> columns = new ArrayList<>();

    private int nextId = 0;

    /**
     * Default empty constructor.
     */
    public RowMetadata() {
        // nothing special here
    }

    /**
     * Default constructor.
     *
     * @param columns the list of column metadata.
     */
    public RowMetadata(List<ColumnMetadata> columns) {
        setColumns(columns);
    }

    /**
     * @return The metadata of this row's columns.
     */
    public List<ColumnMetadata> getColumns() {
        return Collections.unmodifiableList(columns);
    }

    /**
     * Returns true if this data set metadata is compatible with <tt>rowMetadata</tt> (they have same types in
     * the same order and same column names) and false otherwise.
     *
     * @param other the specified row metadata
     * @return true if this row metadata is compatible with the specified one and false otherwise
     */
    public boolean compatible(RowMetadata other) {
        if (other == null || columns.size() != other.getColumns().size()) {
            return false;
        }
        int size = columns.size();
        List<ColumnMetadata> otherColumns = other.getColumns();
        for (int i = 0; i < size; i++) {
            if (!columns.get(i).compatible(otherColumns.get(i))) {
                return false;
            }
        }
        return true;
    }

    /**
     * @param columnMetadata the metadata to set.
     */
    public void setColumns(List<ColumnMetadata> columnMetadata) {
        columns.clear();
        nextId = 0;
        columnMetadata.forEach(this::addColumn);
    }

    public ColumnMetadata addColumn(ColumnMetadata columnMetadata) {
        return addColumn(columnMetadata, columns.size());
    }

    /**
     * Removes the column metadata corresponding to the specified id from the columns of this row metadata, and returns
     * <tt>the deleted column metadata</tt> if it is present. If the specified id does not correspond to a column
     * metadata or if this row metadata does not contain the specified column metadata, the row metadata remains
     * unchanged and returns <tt>null</tt>.
     *
     * @param id the specified id corresponding to a column metadata to be deleted
     * @return <tt>the deleted column metadata</tt> if the column metadata corresponding to the specified id is in this
     * row metadata and <tt>null</tt> otherwise
     */
    public ColumnMetadata deleteColumnById(String id) {
        ColumnMetadata column = getById(id);
        if (column != null && columns.remove(column)) {
            return column;
        }
        return null;
    }

    private ColumnMetadata addColumn(ColumnMetadata columnMetadata, int index) {
        String columnIdFromMetadata = columnMetadata.getId();
        DecimalFormat columnIdFormat = new DecimalFormat(COLUMN_ID_PATTERN);
        if (StringUtils.isBlank(columnIdFromMetadata)) {
            columnMetadata.setId(columnIdFormat.format(nextId));
            nextId++;
        } else {
            try {
                int columnId = columnIdFormat.parse(columnIdFromMetadata).intValue();
                int possibleNextId = columnId + 1;
                if (possibleNextId > nextId) {
                    nextId = possibleNextId;
                }
            } catch (ParseException e) {
                LOGGER.error("Unable to parse column id from metadata '" + columnIdFromMetadata + "'", e);
            }
        }
        columns.add(index, columnMetadata);
        return columnMetadata;
    }

    /**
     * @return the row size.
     */
    public int size() {
        return columns.size();
    }

    /**
     * @param wantedId the wanted column id.
     * @return return the wanted columnMetadata or null if not found.
     */
    public ColumnMetadata getById(String wantedId) {
        // defensive programming
        if (wantedId == null) {
            return null;
        }
        for (ColumnMetadata column : columns) {
            if (wantedId.equals(column.getId())) {
                return column;
            }
        }
        return null;
    }

    /**
     * Compute the diff from the given reference to this and update the diffFlag on each columnMetadata.
     *
     * @param reference the starting point to compute the diff.
     */
    public void diff(RowMetadata reference) {

        // process the new columns
        columns.forEach(column -> {
            if (reference.getById(column.getId()) == null) {
                column.setDiffFlagValue(Flag.NEW.getValue());
            }
        });

        // process the updated columns
        columns.forEach(column -> {
            ColumnMetadata referenceColumn = reference.getById(column.getId());
            if (referenceColumn != null && columnHasChanged(column, referenceColumn)) {
                column.setDiffFlagValue(Flag.UPDATE.getValue());
            }
        });

        // process the deleted columns (add the deleted ones)
        reference.getColumns().forEach(referenceColumn -> {
            if (getById(referenceColumn.getId()) == null) {
                int position = findColumnPosition(reference.getColumns(), referenceColumn.getId());
                referenceColumn.setDiffFlagValue(Flag.DELETE.getValue());
                columns.add(position, referenceColumn);
            }
        });

    }

    /**
     * Change detection between column and its reference (before the transformation)
     *
     * @param column The column metadata
     * @param reference The column reference
     * @return True if the name, domain or type has changed
     */
    private boolean columnHasChanged(final ColumnMetadata column, final ColumnMetadata reference) {
        return !Objects.equals(column.getName(), reference.getName()) //
                || !Objects.equals(column.getDomain(), reference.getDomain()) //
                || !Objects.equals(column.getType(), reference.getType());
    }

    /**
     * Return the column position within the given columns.
     *
     * @param columns the list of columns to search the column from.
     * @param colId the wanted column id.
     * @return the column position within the given columns.
     */
    private int findColumnPosition(List<ColumnMetadata> columns, String colId) {
        for (int i = 0; i < columns.size(); i++) {
            if (columns.get(i).getId().equals(colId)) {
                return i;
            }
        }
        return columns.size();
    }

    @Override
    public String toString() {
        return "RowMetadata{" + "columns=" + columns + '}';
    }

    @Override
    public boolean equals(Object o) {
        if (this == o)
            return true;
        if (o == null || !getClass().isInstance(o))
            return false;
        RowMetadata that = (RowMetadata) o;
        return Objects.equals(columns, that.columns);
    }

    @Override
    public int hashCode() {
        return Objects.hash(columns);
    }

    public void update(@Nonnull String columnId, @Nonnull ColumnMetadata column) {
        if (getById(columnId) == null) {
            return;
        }
        int updatePos = 0;
        for (ColumnMetadata columnMetadata : columns) {
            if (columnId.equals(columnMetadata.getId())) {
                break;
            }
            updatePos++;
        }
        columns.set(updatePos, column);
    }

    /**
     * Insert a new column in this metadata right after the existing <code>columnId</code>. If no column with
     * <code>columnId</code> is to be found, append new column at the end of this row's columns.
     *
     * @param columnId A non null column id. Empty string is allowed, in this case, column will be appended at the end
     * of existing columns.
     * @param column A non null column to insert in this row's metadata.
     * @return The column id of the newly inserted column.
     */
    public String insertAfter(@Nonnull String columnId, @Nonnull ColumnMetadata column) {
        int insertIndex = 0;
        for (ColumnMetadata columnMetadata : columns) {
            insertIndex++;
            if (columnId.equals(columnMetadata.getId())) {
                break;
            }
        }
        addColumn(column, insertIndex);
        return column.getId();
    }

    @Override
    public RowMetadata clone() {
        // also copy the columns !
        List<ColumnMetadata> copyColumns = new ArrayList<>(columns.size());
        columns.forEach(col -> copyColumns.add(ColumnMetadata.Builder.column().copy(col).build()));
        final RowMetadata clone = new RowMetadata(new ArrayList<>(copyColumns));
        clone.nextId = nextId;
        return clone;
    }

    public Schema toSchema() {
        return RowMetadataUtils.toSchema(this);
    }
}