org.apache.kylin.job.cube.GarbageCollectionStep.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.kylin.job.cube.GarbageCollectionStep.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.kylin.job.cube;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.job.cmd.ShellCmdOutput;
import org.apache.kylin.job.exception.ExecuteException;
import org.apache.kylin.job.execution.AbstractExecutable;
import org.apache.kylin.job.execution.ExecutableContext;
import org.apache.kylin.job.execution.ExecuteResult;
import org.apache.kylin.metadata.realization.IRealizationConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Lists;

/**
 * Drop the resources that is no longer needed, including intermediate hive table (after cube build) and hbase tables (after cube merge)
 */
public class GarbageCollectionStep extends AbstractExecutable {

    private static final String OLD_HTABLES = "oldHTables";

    private static final String OLD_HIVE_TABLE = "oldHiveTable";

    private static final String OLD_HDFS_PATHS = "oldHdfsPaths";

    private static final String OLD_HDFS_PATHS_ON_HBASE_CLUSTER = "oldHdfsPathsOnHBaseCluster";

    private static final Logger logger = LoggerFactory.getLogger(GarbageCollectionStep.class);

    private StringBuffer output;

    public GarbageCollectionStep() {
        super();
        output = new StringBuffer();
    }

    @Override
    protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {

        try {
            dropHBaseTable(context);
            dropHiveTable(context);
            dropHdfsPath(context);
            dropHdfsPathOnHBaseCluster(context);
        } catch (IOException e) {
            logger.error("job:" + getId() + " execute finished with exception", e);
            output.append("\n").append(e.getLocalizedMessage());
            return new ExecuteResult(ExecuteResult.State.ERROR, output.toString());
        }

        return new ExecuteResult(ExecuteResult.State.SUCCEED, output.toString());
    }

    private void dropHiveTable(ExecutableContext context) throws IOException {
        final String hiveTable = this.getOldHiveTable();
        if (StringUtils.isNotEmpty(hiveTable)) {
            final String dropSQL = "USE " + KylinConfig.getInstanceFromEnv().getHiveDatabaseForIntermediateTable()
                    + ";" + " DROP TABLE IF EXISTS  " + hiveTable + ";";
            final String dropHiveCMD = "hive -e \"" + dropSQL + "\"";
            logger.info("executing: " + dropHiveCMD);
            ShellCmdOutput shellCmdOutput = new ShellCmdOutput();
            context.getConfig().getCliCommandExecutor().execute(dropHiveCMD, shellCmdOutput);
            logger.debug("Dropped Hive table " + hiveTable + " \n");
            output.append(shellCmdOutput.getOutput() + " \n");
            output.append("Dropped Hive table " + hiveTable + " \n");
        }

    }

    private void dropHBaseTable(ExecutableContext context) throws IOException {
        List<String> oldTables = getOldHTables();
        if (oldTables != null && oldTables.size() > 0) {
            String metadataUrlPrefix = KylinConfig.getInstanceFromEnv().getMetadataUrlPrefix();
            Configuration conf = HBaseConfiguration.create();
            HBaseAdmin admin = null;
            try {
                admin = new HBaseAdmin(conf);
                for (String table : oldTables) {
                    if (admin.tableExists(table)) {
                        HTableDescriptor tableDescriptor = admin.getTableDescriptor(Bytes.toBytes(table));
                        String host = tableDescriptor.getValue(IRealizationConstants.HTableTag);
                        if (metadataUrlPrefix.equalsIgnoreCase(host)) {
                            if (admin.isTableEnabled(table)) {
                                admin.disableTable(table);
                            }
                            admin.deleteTable(table);
                            logger.debug("Dropped HBase table " + table);
                            output.append("Dropped HBase table " + table + " \n");
                        } else {
                            logger.debug("Skipped HBase table " + table);
                            output.append("Skipped HBase table " + table + " \n");
                        }
                    }
                }

            } finally {
                if (admin != null)
                    try {
                        admin.close();
                    } catch (IOException e) {
                        logger.error(e.getLocalizedMessage());
                    }
            }
        }
    }

    private void dropHdfsPathOnCluster(List<String> oldHdfsPaths, FileSystem fileSystem) throws IOException {
        if (oldHdfsPaths != null && oldHdfsPaths.size() > 0) {
            logger.debug("Drop HDFS path on FileSystem: " + fileSystem.getUri());
            output.append("Drop HDFS path on FileSystem: \"" + fileSystem.getUri() + "\" \n");
            for (String path : oldHdfsPaths) {
                if (path.endsWith("*"))
                    path = path.substring(0, path.length() - 1);

                Path oldPath = new Path(path);
                if (fileSystem.exists(oldPath)) {
                    fileSystem.delete(oldPath, true);
                    logger.debug("Dropped HDFS path: " + path);
                    output.append("Dropped HDFS path  \"" + path + "\" \n");
                } else {
                    logger.debug("HDFS path not exists: " + path);
                    output.append("HDFS path not exists: \"" + path + "\" \n");
                }
            }
        }
    }

    private void dropHdfsPath(ExecutableContext context) throws IOException {
        List<String> oldHdfsPaths = this.getOldHdfsPaths();
        FileSystem fileSystem = FileSystem.get(HadoopUtil.getCurrentConfiguration());
        dropHdfsPathOnCluster(oldHdfsPaths, fileSystem);
    }

    private void dropHdfsPathOnHBaseCluster(ExecutableContext context) throws IOException {
        List<String> oldHdfsPaths = this.getOldHdfsPathsOnHBaseCluster();
        FileSystem fileSystem = FileSystem.get(HadoopUtil.getCurrentHBaseConfiguration());
        dropHdfsPathOnCluster(oldHdfsPaths, fileSystem);
    }

    public void setOldHTables(List<String> tables) {
        setArrayParam(OLD_HTABLES, tables);
    }

    private List<String> getOldHTables() {
        return getArrayParam(OLD_HTABLES);
    }

    public void setOldHdfsPaths(List<String> paths) {
        setArrayParam(OLD_HDFS_PATHS, paths);
    }

    private List<String> getOldHdfsPaths() {
        return getArrayParam(OLD_HDFS_PATHS);
    }

    public void setOldHdfsPathsOnHBaseCluster(List<String> paths) {
        setArrayParam(OLD_HDFS_PATHS_ON_HBASE_CLUSTER, paths);
    }

    private List<String> getOldHdfsPathsOnHBaseCluster() {
        return getArrayParam(OLD_HDFS_PATHS_ON_HBASE_CLUSTER);
    }

    private void setArrayParam(String paramKey, List<String> paramValues) {
        setParam(paramKey, StringUtils.join(paramValues, ","));
    }

    private List<String> getArrayParam(String paramKey) {
        final String ids = getParam(paramKey);
        if (ids != null) {
            final String[] splitted = StringUtils.split(ids, ",");
            ArrayList<String> result = Lists.newArrayListWithExpectedSize(splitted.length);
            for (String id : splitted) {
                result.add(id);
            }
            return result;
        } else {
            return Collections.emptyList();
        }
    }

    public void setOldHiveTable(String hiveTable) {
        setParam(OLD_HIVE_TABLE, hiveTable);
    }

    private String getOldHiveTable() {
        return getParam(OLD_HIVE_TABLE);
    }

}