com.facebook.hiveio.output.HiveApiOutputCommitter.java Source code

Java tutorial

Introduction

Here is the source code for com.facebook.hiveio.output.HiveApiOutputCommitter.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.facebook.hiveio.output;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.facebook.hiveio.common.FileSystems;
import com.facebook.hiveio.common.HadoopUtils;
import com.facebook.hiveio.common.HiveUtils;
import com.google.common.base.Preconditions;

import java.io.IOException;
import java.util.List;

/**
 * OutputCommitter for Hive output
 */
class HiveApiOutputCommitter extends OutputCommitter {
    /** Logger */
    private static final Logger LOG = LoggerFactory.getLogger(HiveApiOutputCommitter.class);

    /** Profile ID to use */
    private final String profileId;

    /** Base Hadoop output committer */
    private final OutputCommitter baseCommitter;

    /**
     * Constructor
     *
     * @param baseCommitter Base Hadoop committer
     * @param profileId Profile ID
     */
    public HiveApiOutputCommitter(OutputCommitter baseCommitter, String profileId) {
        this.baseCommitter = baseCommitter;
        this.profileId = profileId;
    }

    @Override
    public void setupJob(JobContext jobContext) throws IOException {
        baseCommitter.setupJob(jobContext);
    }

    @Override
    public void commitJob(JobContext jobContext) throws IOException {
        baseCommitter.commitJob(jobContext);

        Configuration conf = jobContext.getConfiguration();
        OutputConf outputConf = new OutputConf(conf, profileId);
        HiveOutputDescription outputDesc = outputConf.readOutputDescription();
        OutputInfo outputInfo = outputConf.readOutputTableInfo();
        if (outputInfo.hasPartitionInfo()) {
            registerPartitions(conf, outputDesc, outputInfo);
        } else {
            noPartitionsCopyData(conf, outputInfo);
        }

        writeSuccessFile(conf);
    }

    /**
     * Write success file to Hadoop if required
     *
     * @param conf Configuration
     * @throws IOException I/O errors
     */
    private static void writeSuccessFile(Configuration conf) throws IOException {
        if (!HadoopUtils.needSuccessMarker(conf)) {
            return;
        }
        Path outputPath = HadoopUtils.getOutputPath(conf);
        FileSystem fs = outputPath.getFileSystem(conf);
        if (fs.exists(outputPath)) {
            Path successPath = new Path(outputPath, "_SUCCESS");
            if (!fs.exists(successPath)) {
                LOG.info("Writing success file to {}", successPath);
                fs.create(successPath).close();
            }
        }
    }

    /**
     * Register partitions for new data we wrote.
     *
     * @param conf Configuration
     * @param outputDesc Output description from user
     * @param outputInfo Internal output information
     * @throws IOException
     */
    private void registerPartitions(Configuration conf, HiveOutputDescription outputDesc, OutputInfo outputInfo)
            throws IOException {
        String dbName = outputDesc.getTableDesc().getDatabaseName();
        String tableName = outputDesc.getTableDesc().getTableName();

        ThriftHiveMetastore.Iface client;
        Table hiveTable;
        try {
            client = outputDesc.metastoreClient(conf);
            hiveTable = client.get_table(dbName, tableName);
            // CHECKSTYLE: stop IllegalCatch
        } catch (Exception e) {
            // CHECKSTYLE: resume IllegalCatch
            throw new IOException(e);
        }

        Partition partition = new Partition();
        partition.setDbName(dbName);
        partition.setTableName(tableName);
        partition.setParameters(outputInfo.getTableParams());
        List<String> partitionValues = HiveUtils.orderedPartitionValues(hiveTable.getPartitionKeys(),
                outputDesc.getPartitionValues());
        partition.setValues(partitionValues);

        StorageDescriptor sd = new StorageDescriptor(hiveTable.getSd());
        sd.setParameters(outputInfo.getSerializerParams());
        sd.setLocation(outputInfo.getFinalOutputPath());
        sd.setCols(outputInfo.getColumnInfo());
        partition.setSd(sd);

        LOG.info("Registering partition with values {} located at {}", outputInfo.getSerializerParams(),
                outputInfo.getFinalOutputPath());
        try {
            client.add_partition(partition);
            // CHECKSTYLE: stop IllegalCatch
        } catch (Exception e) {
            // CHECKSTYLE: resume IllegalCatch
            throw new IOException(e);
        }
    }

    /**
     * Table has no partitions, just copy data
     *
     * @param conf Configuration
     * @param outputInfo OutputInfo
     * @throws IOException I/O errors
     */
    private void noPartitionsCopyData(Configuration conf, OutputInfo outputInfo) throws IOException {
        Preconditions.checkArgument(!outputInfo.hasPartitionInfo());
        Path tablePath = new Path(outputInfo.getTableRoot());
        Path writePath = new Path(outputInfo.getPartitionPath());
        FileSystem tableFs = tablePath.getFileSystem(conf);
        FileSystem writePathFs = writePath.getFileSystem(conf);
        if (!tableFs.getUri().equals(writePathFs.getUri())) {
            LOG.error("Table's root path fs {} is not on same as its partition path fs {}", tableFs.getUri(),
                    writePathFs.getUri());
            throw new IllegalStateException("Table's root path fs " + tableFs.getUri()
                    + " is not on same as its partition path fs " + writePathFs.getUri());
        }
        LOG.info("No partitions, copying data from {} to {}", writePath, tablePath);
        FileSystems.move(tableFs, writePath, writePath, tablePath);
        tableFs.delete(writePath, true);
    }

    @Override
    @Deprecated
    public void cleanupJob(JobContext jobContext) throws IOException {
        baseCommitter.cleanupJob(jobContext);
    }

    @Override
    public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException {
        baseCommitter.abortJob(jobContext, state);
        HadoopUtils.deleteOutputDir(jobContext.getConfiguration());
    }

    @Override
    public void setupTask(TaskAttemptContext taskContext) throws IOException {
        baseCommitter.setupTask(taskContext);
    }

    @Override
    public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException {
        return baseCommitter.needsTaskCommit(taskContext);
    }

    @Override
    public void commitTask(TaskAttemptContext taskContext) throws IOException {
        HadoopUtils.setWorkOutputDir(taskContext);
        baseCommitter.commitTask(taskContext);
    }

    @Override
    public void abortTask(TaskAttemptContext taskContext) throws IOException {
        baseCommitter.abortTask(taskContext);
    }
}