test.transfer.parse.LoadSemanticAnalyzer.java Source code

Java tutorial

Introduction

Here is the source code for test.transfer.parse.LoadSemanticAnalyzer.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package test.transfer.parse;

import java.io.IOException;
import java.io.Serializable;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.antlr.runtime.tree.Tree;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.IndexUpdater;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.CopyWork;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.plan.MoveWork;

import com.testyun.odps.common.ErrorMsg;

/**
 * LoadSemanticAnalyzer.
 *
 */
public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {

    private boolean isLocal;
    private boolean isOverWrite;

    public LoadSemanticAnalyzer(HiveConf conf) throws SemanticException {
        super(conf);
    }

    @Override
    @SuppressWarnings("nls")
    public void translate(ASTNode ast) throws SemanticException {

    }

    public static FileStatus[] matchFilesOrDir(FileSystem fs, Path path) throws IOException {
        FileStatus[] srcs = fs.globStatus(path);
        if ((srcs != null) && srcs.length == 1) {
            if (srcs[0].isDir()) {
                srcs = fs.listStatus(srcs[0].getPath());
            }
        }
        return (srcs);
    }

    private URI initializeFromURI(String fromPath) throws IOException, URISyntaxException {
        URI fromURI = new Path(fromPath).toUri();

        String fromScheme = fromURI.getScheme();
        String fromAuthority = fromURI.getAuthority();
        String path = fromURI.getPath();

        // generate absolute path relative to current directory or hdfs home
        // directory
        if (!path.startsWith("/")) {
            if (isLocal) {
                path = new Path(System.getProperty("user.dir"), path).toString();
            } else {
                path = new Path(new Path("/user/" + System.getProperty("user.name")), path).toString();
            }
        }

        // set correct scheme and authority
        if (StringUtils.isEmpty(fromScheme)) {
            if (isLocal) {
                // file for local
                fromScheme = "file";
            } else {
                // use default values from fs.default.name
                URI defaultURI = FileSystem.get(conf).getUri();
                fromScheme = defaultURI.getScheme();
                fromAuthority = defaultURI.getAuthority();
            }
        }

        // if scheme is specified but not authority then use the default authority
        if (fromScheme.equals("hdfs") && StringUtils.isEmpty(fromAuthority)) {
            URI defaultURI = FileSystem.get(conf).getUri();
            fromAuthority = defaultURI.getAuthority();
        }

        LOG.debug(fromScheme + "@" + fromAuthority + "@" + path);
        return new URI(fromScheme, fromAuthority, path, null, null);
    }

    private void applyConstraints(URI fromURI, URI toURI, Tree ast, boolean isLocal) throws SemanticException {
        if (!fromURI.getScheme().equals("file") && !fromURI.getScheme().equals("pangu")
                && !fromURI.getScheme().equals("hdfs")) {
            throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast,
                    "only \"file\" or \"hdfs\" or \"pangu\" file systems accepted"));
        }

        // local mode implies that scheme should be "file"
        // we can change this going forward
        if (isLocal && !fromURI.getScheme().equals("file")) {
            throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast,
                    "Source file system should be \"file\" if \"local\" is specified"));
        }

        try {
            FileStatus[] srcs = matchFilesOrDir(FileSystem.get(fromURI, conf),
                    new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI.getPath()));

            if (srcs == null || srcs.length == 0) {
                throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, "No files matching path " + fromURI));
            }

            for (FileStatus oneSrc : srcs) {
                if (oneSrc.isDir()) {
                    throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast,
                            "source contains directory: " + oneSrc.getPath().toString()));
                }
            }
        } catch (IOException e) {
            // Has to use full name to make sure it does not conflict with
            // org.apache.commons.lang.StringUtils
            throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast), e);
        }

        // only in 'local' mode do we copy stuff from one place to another.
        // reject different scheme/authority in other cases.
        if (!isLocal && (!StringUtils.equals(fromURI.getScheme(), toURI.getScheme())
                || !StringUtils.equals(fromURI.getAuthority(), toURI.getAuthority()))) {
            /*String reason = "Move from: " + fromURI.toString() + " to: "
                + toURI.toString() + " is not valid. "
                + "Please check that values for params \"default.fs.name\" and "
                + "\"hive.metastore.warehouse.dir\" do not conflict.";
            throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast, reason));*/
        }
    }

    @Override
    public void analyzeInternal(ASTNode ast) throws SemanticException {
        isLocal = false;
        isOverWrite = false;
        Tree fromTree = ast.getChild(0);
        Tree tableTree = ast.getChild(1);

        if (ast.getChildCount() == 4) {
            isLocal = true;
            isOverWrite = true;
        }

        if (ast.getChildCount() == 3) {
            if (ast.getChild(2).getText().toLowerCase().equals("local")) {
                isLocal = true;
            } else {
                isOverWrite = true;
            }
        }

        // initialize load path
        URI fromURI;
        try {
            String fromPath = stripQuotes(fromTree.getText());
            fromURI = initializeFromURI(fromPath);
        } catch (IOException e) {
            throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e);
        } catch (URISyntaxException e) {
            throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e);
        }

        // initialize destination table/partition
        tableSpec ts = new tableSpec(db, conf, (ASTNode) tableTree);

        if (ts.tableHandle.isOffline()) {
            throw new SemanticException(ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(":Table " + ts.tableName));
        }

        if (ts.tableHandle.isView()) {
            throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
        }
        if (ts.tableHandle.isNonNative()) {
            throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
        }
        URI toURI = (ts.partHandle != null) ? ts.partHandle.getDataLocation() : ts.tableHandle.getDataLocation();

        List<FieldSchema> parts = ts.tableHandle.getPartitionKeys();
        if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) {
            throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg());
        }

        // make sure the arguments make sense
        applyConstraints(fromURI, toURI, fromTree, isLocal);

        Task<? extends Serializable> rTask = null;

        // create copy work
        if (isLocal) {
            // if the local keyword is specified - we will always make a copy. this
            // might seem redundant in the case
            // that the hive warehouse is also located in the local file system - but
            // that's just a test case.
            String copyURIStr = ctx.getExternalTmpFileURI(toURI);
            URI copyURI = URI.create(copyURIStr);
            rTask = TaskFactory.get(new CopyWork(fromURI.toString(), copyURIStr), conf);
            fromURI = copyURI;
        }

        // create final load/move work

        String loadTmpPath = ctx.getExternalTmpFileURI(toURI);
        Map<String, String> partSpec = ts.getPartSpec();
        if (partSpec == null) {
            partSpec = new LinkedHashMap<String, String>();
            outputs.add(new WriteEntity(ts.tableHandle));
        } else {
            try {
                Partition part = Hive.get().getPartition(ts.tableHandle, partSpec, false);
                if (part != null) {
                    if (part.isOffline()) {
                        throw new SemanticException(
                                ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(ts.tableName + ":" + part.getName()));
                    }
                    outputs.add(new WriteEntity(part));
                } else {
                    outputs.add(new WriteEntity(ts.tableHandle));
                }
            } catch (HiveException e) {
                throw new SemanticException(e);
            }
        }

        LoadTableDesc loadTableWork = new LoadTableDesc(fromURI.toString(), loadTmpPath,
                Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite);

        if (rTask != null) {
            rTask.addDependentTask(
                    TaskFactory.get(new MoveWork(getInputs(), getOutputs(), loadTableWork, null, true), conf));
        } else {
            rTask = TaskFactory.get(new MoveWork(getInputs(), getOutputs(), loadTableWork, null, true), conf);
        }

        rootTasks.add(rTask);
        if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
            IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, getInputs(), conf);
            try {
                List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks();
                for (Task<? extends Serializable> updateTask : indexUpdateTasks) {
                    //LOAD DATA will either have a copy & move or just a move, we always want the update to be dependent on the move
                    if (rTask.getChildren() == null || rTask.getChildren().size() == 0) {
                        rTask.addDependentTask(updateTask);
                    } else {
                        ((Task<? extends Serializable>) rTask.getChildren().get(0)).addDependentTask(updateTask);
                    }
                }
            } catch (HiveException e) {
                console.printInfo("WARNING: could not auto-update stale indexes, indexes are not out of sync");
            }
        }
    }
}