org.apache.tajo.engine.planner.physical.ColumnPartitionedTableStoreExec.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tajo.engine.planner.physical.ColumnPartitionedTableStoreExec.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 *
 */
package org.apache.tajo.engine.planner.physical;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.tajo.catalog.CatalogUtil;
import org.apache.tajo.catalog.Column;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.catalog.TableMeta;
import org.apache.tajo.catalog.partition.PartitionDesc;
import org.apache.tajo.catalog.statistics.StatisticsUtil;
import org.apache.tajo.catalog.statistics.TableStats;
import org.apache.tajo.datum.Datum;
import org.apache.tajo.engine.planner.logical.StoreTableNode;
import org.apache.tajo.engine.planner.PlannerUtil;
import org.apache.tajo.storage.Appender;
import org.apache.tajo.storage.StorageManagerFactory;
import org.apache.tajo.storage.StorageUtil;
import org.apache.tajo.storage.Tuple;
import org.apache.tajo.worker.TaskAttemptContext;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.apache.tajo.catalog.proto.CatalogProtos.PartitionsType;

/**
 * This class is a physical operator to store at column partitioned table.
 */
public class ColumnPartitionedTableStoreExec extends UnaryPhysicalExec {
    private static Log LOG = LogFactory.getLog(ColumnPartitionedTableStoreExec.class);

    private final TableMeta meta;
    private final StoreTableNode plan;
    private Tuple tuple;
    private Path storeTablePath;
    private final Map<String, Appender> appenderMap = new HashMap<String, Appender>();
    private int[] partitionColumnIndices;
    private String[] partitionColumnNames;

    public ColumnPartitionedTableStoreExec(TaskAttemptContext context, StoreTableNode plan, PhysicalExec child)
            throws IOException {
        super(context, plan.getInSchema(), plan.getOutSchema(), child);
        this.plan = plan;

        // set table meta
        if (this.plan.hasOptions()) {
            meta = CatalogUtil.newTableMeta(plan.getStorageType(), plan.getOptions());
        } else {
            meta = CatalogUtil.newTableMeta(plan.getStorageType());
        }

        // Rewrite a output schema because we don't have to store field values
        // corresponding to partition key columns.
        if (plan.getPartitions() != null && plan.getPartitions().getPartitionsType() == PartitionsType.COLUMN) {
            rewriteColumnPartitionedTableSchema();
        }

        // Find column index to name subpartition directory path
        if (this.plan.getPartitions() != null) {
            if (this.plan.getPartitions().getColumns() != null) {
                partitionColumnIndices = new int[plan.getPartitions().getColumns().size()];
                partitionColumnNames = new String[partitionColumnIndices.length];
                Schema columnPartitionSchema = plan.getPartitions().getSchema();
                for (int i = 0; i < columnPartitionSchema.getColumnNum(); i++) {
                    Column targetColumn = columnPartitionSchema.getColumn(i);
                    for (int j = 0; j < plan.getInSchema().getColumns().size(); j++) {
                        Column inputColumn = plan.getInSchema().getColumn(j);
                        if (inputColumn.getColumnName().equals(targetColumn.getColumnName())) {
                            partitionColumnIndices[i] = j;
                            partitionColumnNames[i] = targetColumn.getColumnName();
                        }
                    }
                }
            }
        }
    }

    /**
     * This method rewrites an input schema of column-partitioned table because
     * there are no actual field values in data file in a column-partitioned table.
     * So, this method removes partition key columns from the input schema.
     */
    private void rewriteColumnPartitionedTableSchema() {
        PartitionDesc partitionDesc = plan.getPartitions();
        Schema columnPartitionSchema = (Schema) partitionDesc.getSchema().clone();
        String qualifier = plan.getTableName();

        outSchema = PlannerUtil.rewriteColumnPartitionedTableSchema(partitionDesc, columnPartitionSchema, outSchema,
                qualifier);
    }

    public void init() throws IOException {
        super.init();

        storeTablePath = context.getOutputPath();
        FileSystem fs = storeTablePath.getFileSystem(context.getConf());
        if (!fs.exists(storeTablePath.getParent())) {
            fs.mkdirs(storeTablePath.getParent());
        }
    }

    private Appender getAppender(String partition) throws IOException {
        Appender appender = appenderMap.get(partition);

        if (appender == null) {
            Path dataFile = getDataFile(partition);
            FileSystem fs = dataFile.getFileSystem(context.getConf());

            if (fs.exists(dataFile.getParent())) {
                LOG.info("Path " + dataFile.getParent() + " already exists!");
            } else {
                fs.mkdirs(dataFile.getParent());
                LOG.info("Add subpartition path directory :" + dataFile.getParent());
            }

            if (fs.exists(dataFile)) {
                LOG.info("File " + dataFile + " already exists!");
                FileStatus status = fs.getFileStatus(dataFile);
                LOG.info("File size: " + status.getLen());
            }

            appender = StorageManagerFactory.getStorageManager(context.getConf()).getAppender(meta, outSchema,
                    dataFile);
            appender.enableStats();
            appender.init();
            appenderMap.put(partition, appender);
        } else {
            appender = appenderMap.get(partition);
        }
        return appender;
    }

    private Path getDataFile(String partition) {
        return StorageUtil.concatPath(storeTablePath.getParent(), partition, storeTablePath.getName());
    }

    /* (non-Javadoc)
     * @see PhysicalExec#next()
     */
    @Override
    public Tuple next() throws IOException {
        StringBuilder sb = new StringBuilder();
        while ((tuple = child.next()) != null) {
            // set subpartition directory name
            sb.delete(0, sb.length());
            if (partitionColumnIndices != null) {
                for (int i = 0; i < partitionColumnIndices.length; i++) {
                    Datum datum = tuple.get(partitionColumnIndices[i]);
                    if (i > 0)
                        sb.append("/");
                    sb.append(partitionColumnNames[i]).append("=");
                    sb.append(datum.asChars());
                }
            }

            // add tuple
            Appender appender = getAppender(sb.toString());
            appender.addTuple(tuple);
        }

        List<TableStats> statSet = new ArrayList<TableStats>();
        for (Map.Entry<String, Appender> entry : appenderMap.entrySet()) {
            Appender app = entry.getValue();
            app.flush();
            app.close();
            statSet.add(app.getStats());
        }

        // Collect and aggregated statistics data
        TableStats aggregated = StatisticsUtil.aggregateTableStat(statSet);
        context.setResultStats(aggregated);

        return null;
    }

    @Override
    public void rescan() throws IOException {
        // nothing to do
    }
}