org.apache.hadoop.hive.ql.exec.SkewJoinHandler.java Source code

Introduction

Here is the source code for org.apache.hadoop.hive.ql.exec.SkewJoinHandler.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.persistence.RowContainer;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.util.ReflectionUtils;

/**
 * At runtime in Join, we output big keys in one table into one corresponding
 * directories, and all same keys in other tables into different dirs(one for
 * each table). The directories will look like:
 * <ul>
 * <li>
 * dir-T1-bigkeys(containing big keys in T1), dir-T2-keys(containing keys which
 * is big in T1),dir-T3-keys(containing keys which is big in T1), ...
 * <li>
 * dir-T1-keys(containing keys which is big in T2), dir-T2-bigkeys(containing
 * big keys in T2),dir-T3-keys(containing keys which is big in T2), ...
 * <li>
 * dir-T1-keys(containing keys which is big in T3), dir-T2-keys(containing big
 * keys in T3),dir-T3-bigkeys(containing keys which is big in T3), ... .....
 * </ul>
 *
 * <p>
 * For each skew key, we first write all values to a local tmp file. At the time
 * of ending the current group, the local tmp file will be uploaded to hdfs.
 * Right now, we use one file per skew key.
 *
 * <p>
 * For more info, please see https://issues.apache.org/jira/browse/HIVE-964.
 *
 */
public class SkewJoinHandler {

    protected static final Log LOG = LogFactory.getLog(SkewJoinHandler.class.getName());

    public int currBigKeyTag = -1;

    private int rowNumber = 0;
    private int currTag = -1;

    private int skewKeyDefinition = -1;
    private Map<Byte, StructObjectInspector> skewKeysTableObjectInspector = null;
    private Map<Byte, SerDe> tblSerializers = null;
    private Map<Byte, TableDesc> tblDesc = null;

    private Map<Byte, Boolean> bigKeysExistingMap = null;

    private LongWritable skewjoinFollowupJobs;

    private final boolean noOuterJoin;
    Configuration hconf = null;
    List<Object> dummyKey = null;
    String taskId;

    private final CommonJoinOperator<? extends OperatorDesc> joinOp;
    private final int numAliases;
    private final JoinDesc conf;

    public SkewJoinHandler(CommonJoinOperator<? extends OperatorDesc> joinOp) {
        this.joinOp = joinOp;
        numAliases = joinOp.numAliases;
        conf = joinOp.getConf();
        noOuterJoin = joinOp.noOuterJoin;
    }

    public void initiliaze(Configuration hconf) {
        this.hconf = hconf;
        JoinDesc desc = joinOp.getConf();
        skewKeyDefinition = desc.getSkewKeyDefinition();
        skewKeysTableObjectInspector = new HashMap<Byte, StructObjectInspector>(numAliases);
        tblDesc = desc.getSkewKeysValuesTables();
        tblSerializers = new HashMap<Byte, SerDe>(numAliases);
        bigKeysExistingMap = new HashMap<Byte, Boolean>(numAliases);
        taskId = Utilities.getTaskId(hconf);

        int[][] filterMap = desc.getFilterMap();
        for (int i = 0; i < numAliases; i++) {
            Byte alias = conf.getTagOrder()[i];
            List<ObjectInspector> skewTableKeyInspectors = new ArrayList<ObjectInspector>();
            StructObjectInspector soi = (StructObjectInspector) joinOp.inputObjInspectors[alias];
            StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
            List<? extends StructField> keyFields = ((StructObjectInspector) sf.getFieldObjectInspector())
                    .getAllStructFieldRefs();
            int keyFieldSize = keyFields.size();
            for (int k = 0; k < keyFieldSize; k++) {
                skewTableKeyInspectors.add(keyFields.get(k).getFieldObjectInspector());
            }
            TableDesc joinKeyDesc = desc.getKeyTableDesc();
            List<String> keyColNames = Utilities.getColumnNames(joinKeyDesc.getProperties());
            StructObjectInspector structTblKeyInpector = ObjectInspectorFactory
                    .getStandardStructObjectInspector(keyColNames, skewTableKeyInspectors);

            try {
                SerDe serializer = (SerDe) ReflectionUtils.newInstance(tblDesc.get(alias).getDeserializerClass(),
                        null);
                SerDeUtils.initializeSerDe(serializer, null, tblDesc.get(alias).getProperties(), null);
                tblSerializers.put((byte) i, serializer);
            } catch (SerDeException e) {
                LOG.error("Skewjoin will be disabled due to " + e.getMessage(), e);
                joinOp.handleSkewJoin = false;
                break;
            }

            boolean hasFilter = filterMap != null && filterMap[i] != null;
            TableDesc valTblDesc = JoinUtil.getSpillTableDesc(alias, joinOp.spillTableDesc, conf, !hasFilter);
            List<String> valColNames = new ArrayList<String>();
            if (valTblDesc != null) {
                valColNames = Utilities.getColumnNames(valTblDesc.getProperties());
            }
            StructObjectInspector structTblValInpector = ObjectInspectorFactory
                    .getStandardStructObjectInspector(valColNames, joinOp.joinValuesStandardObjectInspectors[i]);

            StructObjectInspector structTblInpector = ObjectInspectorFactory
                    .getUnionStructObjectInspector(Arrays.asList(structTblValInpector, structTblKeyInpector));
            skewKeysTableObjectInspector.put((byte) i, structTblInpector);
        }

        // reset rowcontainer's serde, objectinspector, and tableDesc.
        for (int i = 0; i < numAliases; i++) {
            Byte alias = conf.getTagOrder()[i];
            RowContainer<ArrayList<Object>> rc = (RowContainer) joinOp.storage[i];
            if (rc != null) {
                rc.setSerDe(tblSerializers.get((byte) i), skewKeysTableObjectInspector.get((byte) i));
                rc.setTableDesc(tblDesc.get(alias));
            }
        }
    }

    void endGroup() throws IOException, HiveException {
        if (skewKeyInCurrentGroup) {

            Path specPath = conf.getBigKeysDirMap().get((byte) currBigKeyTag);
            RowContainer<ArrayList<Object>> bigKey = (RowContainer) joinOp.storage[currBigKeyTag];
            Path outputPath = getOperatorOutputPath(specPath);
            FileSystem destFs = outputPath.getFileSystem(hconf);
            bigKey.copyToDFSDirecory(destFs, outputPath);

            for (int i = 0; i < numAliases; i++) {
                if (((byte) i) == currBigKeyTag) {
                    continue;
                }
                RowContainer<ArrayList<Object>> values = (RowContainer) joinOp.storage[i];
                if (values != null) {
                    specPath = conf.getSmallKeysDirMap().get((byte) currBigKeyTag).get((byte) i);
                    values.copyToDFSDirecory(destFs, getOperatorOutputPath(specPath));
                }
            }
        }
        skewKeyInCurrentGroup = false;
    }

    boolean skewKeyInCurrentGroup = false;

    public void handleSkew(int tag) throws HiveException {

        if (joinOp.newGroupStarted || tag != currTag) {
            rowNumber = 0;
            currTag = tag;
        }

        if (joinOp.newGroupStarted) {
            currBigKeyTag = -1;
            joinOp.newGroupStarted = false;
            dummyKey = (List<Object>) joinOp.getGroupKeyObject();
            skewKeyInCurrentGroup = false;

            for (int i = 0; i < numAliases; i++) {
                RowContainer<ArrayList<Object>> rc = (RowContainer) joinOp.storage[i];
                if (rc != null) {
                    rc.setKeyObject(dummyKey);
                }
            }
        }

        rowNumber++;
        if (currBigKeyTag == -1 && (tag < numAliases - 1) && rowNumber >= skewKeyDefinition) {
            // the first time we see a big key. If this key is not in the last
            // table (the last table can always be streamed), we define that we get
            // a skew key now.
            currBigKeyTag = tag;
            updateSkewJoinJobCounter(tag);
            // right now we assume that the group by is an ArrayList object. It may
            // change in future.
            if (!(dummyKey instanceof List)) {
                throw new RuntimeException("Bug in handle skew key in a seperate job.");
            }

            skewKeyInCurrentGroup = true;
            bigKeysExistingMap.put(Byte.valueOf((byte) currBigKeyTag), Boolean.TRUE);
        }
    }

    public void close(boolean abort) throws HiveException {
        if (!abort) {
            try {
                endGroup();
                commit();
            } catch (IOException e) {
                throw new HiveException(e);
            }
        } else {
            for (int bigKeyTbl = 0; bigKeyTbl < numAliases; bigKeyTbl++) {

                // if we did not see a skew key in this table, continue to next
                // table
                if (!bigKeysExistingMap.get((byte) bigKeyTbl)) {
                    continue;
                }

                try {
                    Path specPath = conf.getBigKeysDirMap().get((byte) bigKeyTbl);
                    Path bigKeyPath = getOperatorOutputPath(specPath);
                    FileSystem fs = bigKeyPath.getFileSystem(hconf);
                    delete(bigKeyPath, fs);
                    for (int smallKeyTbl = 0; smallKeyTbl < numAliases; smallKeyTbl++) {
                        if (((byte) smallKeyTbl) == bigKeyTbl) {
                            continue;
                        }
                        specPath = conf.getSmallKeysDirMap().get((byte) bigKeyTbl).get((byte) smallKeyTbl);
                        delete(getOperatorOutputPath(specPath), fs);
                    }
                } catch (IOException e) {
                    throw new HiveException(e);
                }
            }
        }
    }

    private void delete(Path operatorOutputPath, FileSystem fs) {
        try {
            fs.delete(operatorOutputPath, true);
        } catch (IOException e) {
            LOG.error(e);
        }
    }

    private void commit() throws IOException {
        for (int bigKeyTbl = 0; bigKeyTbl < numAliases; bigKeyTbl++) {

            // if we did not see a skew key in this table, continue to next table
            // we are trying to avoid an extra call of FileSystem.exists()
            Boolean existing = bigKeysExistingMap.get(Byte.valueOf((byte) bigKeyTbl));
            if (existing == null || !existing) {
                continue;
            }

            Path specPath = conf.getBigKeysDirMap().get(Byte.valueOf((byte) bigKeyTbl));
            commitOutputPathToFinalPath(specPath, false);
            for (int smallKeyTbl = 0; smallKeyTbl < numAliases; smallKeyTbl++) {
                if (smallKeyTbl == bigKeyTbl) {
                    continue;
                }
                specPath = conf.getSmallKeysDirMap().get(Byte.valueOf((byte) bigKeyTbl))
                        .get(Byte.valueOf((byte) smallKeyTbl));
                // the file may not exist, and we just ignore this
                commitOutputPathToFinalPath(specPath, true);
            }
        }
    }

    private void commitOutputPathToFinalPath(Path specPath, boolean ignoreNonExisting) throws IOException {
        Path outPath = getOperatorOutputPath(specPath);
        Path finalPath = getOperatorFinalPath(specPath);
        FileSystem fs = outPath.getFileSystem(hconf);
        if (ignoreNonExisting && !fs.exists(outPath)) {
            return;
        }
        if (!fs.rename(outPath, finalPath)) {
            throw new IOException("Unable to rename output to: " + finalPath);
        }
    }

    private Path getOperatorOutputPath(Path specPath) throws IOException {
        return new Path(Utilities.toTempPath(specPath), Utilities.toTempPath(taskId));
    }

    private Path getOperatorFinalPath(Path specPath) throws IOException {
        return new Path(Utilities.toTempPath(specPath), taskId);
    }

    public void setSkewJoinJobCounter(LongWritable skewjoinFollowupJobs) {
        this.skewjoinFollowupJobs = skewjoinFollowupJobs;
    }

    public void updateSkewJoinJobCounter(int tag) {
        this.skewjoinFollowupJobs.set(this.skewjoinFollowupJobs.get() + 1);
    }

}