com.aliyun.odps.mapred.bridge.utils.ValidatorFactory.java Source code

Java tutorial

Introduction

Here is the source code for com.aliyun.odps.mapred.bridge.utils.ValidatorFactory.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.mapred.bridge.utils;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang.StringUtils;

import com.aliyun.odps.Column;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.Table;
import com.aliyun.odps.data.TableInfo;
import com.aliyun.odps.data.VolumeInfo;
import com.aliyun.odps.mapred.bridge.ErrorCode;
import com.aliyun.odps.mapred.bridge.MetaExplorer;
import com.aliyun.odps.mapred.conf.JobConf;
import com.aliyun.odps.mapred.utils.InputUtils;
import com.aliyun.odps.mapred.utils.OutputUtils;
import com.aliyun.odps.mapred.utils.SchemaUtils;
import com.aliyun.odps.pipeline.Pipeline;
import com.aliyun.odps.pipeline.Pipeline.TransformNode;

public class ValidatorFactory {

    private static boolean between(long i, long l, long h) {
        if (i > h || i < l) {
            return false;
        }
        return true;
    }

    private static boolean validateColumns(String[] columns, Column[] schema, StringBuilder errorMsg) {
        Set<String> schemaColums = new HashSet<String>();
        for (int i = 0; i < schema.length; ++i) {
            schemaColums.add(schema[i].getName());
        }
        for (int i = 0; i < columns.length; ++i) {
            if (!schemaColums.contains(columns[i])) {
                errorMsg.append("Can't find column " + columns[i] + " from key schema.");
                return false;
            }
        }
        return true;
    }

    static void throwException(ErrorCode e) throws OdpsException {
        throw new OdpsException(e.code, e.msg);
    }

    static void throwException(ErrorCode e, String info) throws OdpsException {
        throw new OdpsException(e.code, e.msg + ":" + info);
    }

    public static void throwException(ErrorCode e, Throwable cause) throws OdpsException {
        throw new OdpsException(e.code, e.msg, cause);

    }

    static class CompositeValidator implements Validator {

        List<Validator> validators = new ArrayList<Validator>();

        public void addValidator(Validator v) {
            validators.add(v);
        }

        @Override
        public void validate() throws OdpsException {
            for (Validator v : validators) {
                v.validate();
            }
        }
    }

    /**
     * Meta Validator.
     *
     * Validate the input/output tables available.
     */
    static class InputOutputValidator implements Validator {

        private JobConf job;
        private MetaExplorer explorer;

        public InputOutputValidator(JobConf job, MetaExplorer explorer) {
            this.job = job;
            this.explorer = explorer;
        }

        /**
         * Validate if table
         *
         * 1. Exists
         * 2. Not a view
         * 3. Partition column exists (if applicable)
         * 4. Column exists (if applicable)
         *
         * @param table
         *     The table
         * @param distinctTables
         *     Cache tables in the same group.
         * @throws OdpsException
         */
        private void validateTable(TableInfo table, Map<String, Table> distinctTables) throws OdpsException {

            Table tableDesc = distinctTables.get(table.getProjectName() + "." + table.getTableName());
            if (tableDesc == null) {
                // check if table exists
                if (!explorer.existsTable(table.getProjectName(), table.getTableName())) {
                    throwException(ErrorCode.TABLE_NOT_FOUND, table.toString());
                }

                // view is now unsupported
                tableDesc = explorer.getTable(table.getProjectName(), table.getTableName());
                if (tableDesc.isVirtualView()) {
                    throwException(ErrorCode.VIEW_TABLE, table.toString());
                }
                distinctTables.put(table.getProjectName() + "." + table.getTableName(), tableDesc);
            }

            // Check if part spec is valid
            Map<String, String> partSpec = table.getPartSpec();
            if (partSpec != null) {
                List<Column> cols = tableDesc.getSchema().getPartitionColumns();
                List<String> colNames = new ArrayList<String>();
                for (Column c : cols) {
                    colNames.add(c.getName());
                }
                for (String key : partSpec.keySet()) {
                    if (!colNames.contains(key.toLowerCase())) {
                        throwException(ErrorCode.PARTITION_COLUMN_NOT_FOUND, key);
                    }
                }
            }

            // Check if select columns exist in source table.
            if (table.getCols() != null) {
                List<Column> schema = tableDesc.getSchema().getColumns();
                HashSet<String> names = new HashSet<String>(Arrays.asList(table.getCols()));
                names.removeAll(Arrays.asList(SchemaUtils.getNames(schema.toArray(new Column[schema.size()]))));
                if (!names.isEmpty()) {
                    throwException(ErrorCode.COLUMN_NOT_FOUND, StringUtils.join(names, ","));
                }
            }
        }

        /**
         * Validate if volume exists and is not duplicated with other volume in the same group
         *
         * @param volume
         *     Volume
         * @param distinctVolumes
         *     Cache volumes in the same group
         * @throws OdpsException
         */
        private void validateVolume(VolumeInfo volume, Set<String> distinctVolumes) throws OdpsException {
            String volumeDesc = volume.getProjectName() + "." + volume.getVolumeName() + "." + volume.getPartSpec();
            if (distinctVolumes.contains(volumeDesc)) {
                throwException(ErrorCode.DUPLICATED_VOLUME_FOUND, volume.toString());
            }
            distinctVolumes.add(volumeDesc);
            // Check if volume spec is valid
            if (volume.getProjectName() == null) {
                volume.setProjectName(explorer.getDefaultProject());
            }
            if (volume.getVolumeName() == null || volume.getVolumeName().isEmpty()) {
                throwException(ErrorCode.MALFORMED_VOLUME_SPEC, "volume name not specified");
            }
            if (volume.getPartSpec() == null || volume.getPartSpec().isEmpty()) {
                throwException(ErrorCode.MALFORMED_VOLUME_SPEC, "volume partition not specified");
            }

            // Check if volume exists
            if (!explorer.existsVolume(volume.getProjectName(), volume.getVolumeName())) {
                throwException(ErrorCode.VOLUME_NOT_FOUND, volume.toString());
            }
            // Check if label is valid
            if (!between(volume.getLabel().length(), 3, 32)
                    || !volume.getLabel().matches("[A-Z,a-z,0-9,_,#,\\.,\\-]*")) {
                throwException(ErrorCode.MALFORMED_VOLUME_SPEC, "illegal label " + volume.getLabel()
                        + ", expecting [A-Z,a-z,0-9,_,#,\\.,\\-]* with length of [3,32]");
            }
        }

        private void validateTables(TableInfo[] tables) throws OdpsException {
            Map<String, Table> distinctOutputTables = new HashMap<String, Table>();
            Set<String> labelNames = new HashSet<String>();
            for (TableInfo table : tables) {
                validateTable(table, distinctOutputTables);
                if (labelNames.contains(table.getLabel())) {
                    throwException(ErrorCode.OUTPUT_LABEL_NOT_UNIQUE, table.getLabel());
                }
                labelNames.add(table.getLabel());
            }
        }

        private void validateVolumes(VolumeInfo[] volumes) throws OdpsException {
            Set<String> distinctVolumes = new HashSet<String>();
            Set<String> labelNames = new HashSet<String>();
            for (VolumeInfo volume : volumes) {
                validateVolume(volume, distinctVolumes);
                if (labelNames.contains(volume.getLabel())) {
                    throwException(ErrorCode.VOLUME_LABEL_NOT_UNIQUE, volume.getLabel());
                }
                labelNames.add(volume.getLabel());
            }
        }

        @Override
        public void validate() throws OdpsException {
            // Validate inputs
            TableInfo[] tables = InputUtils.getTables(job);
            if (tables != null && tables.length > 0) {
                if (tables.length > 1024) {
                    throwException(ErrorCode.TOO_MANY_INPUT_TABLE, "Expecting no more than 1024 partitions. ");
                }
                Map<String, Table> distinctInputTables = new HashMap<String, Table>();
                for (TableInfo table : tables) {
                    validateTable(table, distinctInputTables);
                    if (distinctInputTables.size() > 64) {
                        throwException(ErrorCode.TOO_MANY_INPUT_TABLE,
                                "Expecting no more than 64 distinct tables. ");
                    }
                }
            }

            // Validate outputs
            tables = OutputUtils.getTables(job);
            if (tables != null && tables.length > 0) {
                validateTables(tables);
            }

            // Validate input volumes
            VolumeInfo[] volumes = InputUtils.getVolumes(job);
            if (volumes != null && volumes.length > 0) {
                if (volumes.length > 256) {
                    throwException(ErrorCode.TOO_MANY_INPUT_VOLUME, "Expecting no more than 256 partitions. ");
                }
                validateVolumes(volumes);
            }

            // Validate output volumes
            volumes = OutputUtils.getVolumes(job);
            if (volumes != null && volumes.length > 0) {
                if (volumes.length > 256) {
                    throwException(ErrorCode.TOO_MANY_OUTPUT_VOLUME, "Expecting no more than 256 partitions. ");
                }
                validateVolumes(volumes);
            }
        }

    }

    /**
     * Config Validator.
     *
     * Validate the job configuration.
     */
    static class ConfigValidator implements Validator {

        private JobConf job;

        public ConfigValidator(JobConf job) {
            this.job = job;
        }

        @Override
        public void validate() throws OdpsException {
            if (job.get("odps.mapred.map.class") == null) {
                throwException(ErrorCode.ILLEGAL_CONFIG, "Mapper class not specified.");
            }

            if (InputUtils.getTables(job) == null && !between(job.getNumMapTasks(), 0, 99999)) {
                throwException(ErrorCode.ILLEGAL_CONFIG,
                        "Map tasks " + job.getNumMapTasks() + " out of bound, should be in [0, 99999] range.");
            }

            if (!between(job.getNumReduceTasks(), 0, 99999)) {
                throwException(ErrorCode.ILLEGAL_CONFIG, "Reduce tasks " + job.getNumReduceTasks()
                        + " out of bound, should be in [0, 99999] range.");
            }

            if (job.getNumReduceTasks() != 0 && job.get("odps.mapred.reduce.class") == null) {
                throwException(ErrorCode.ILLEGAL_CONFIG, "Reducer class not specified.");
            }

            if (job.getNumReduceTasks() != 0 && !validateSchema(job.getMapOutputKeySchema())) {
                throwException(ErrorCode.ILLEGAL_CONFIG,
                        "Malformed map output key schema:" + job.get("odps.mapred.mapoutput.key.schema"));
            }
            if (job.getNumReduceTasks() != 0 && !validateSchema(job.getMapOutputValueSchema())) {
                throwException(ErrorCode.ILLEGAL_CONFIG,
                        "Malformed map output value schema:" + job.get("odps.mapred.mapoutput.value.schema"));
            }

            if (!between(job.getMemoryForJVM(), 256, 12 * 1024)) {
                throwException(ErrorCode.ILLEGAL_CONFIG, "Memory for jvm " + job.getMemoryForJVM()
                        + " out of bound, should be in [256, 12288] range.");
            }
            if (job.getOutputKeySortColumns().length != job.getOutputKeySortOrder().length) {
                throwException(ErrorCode.ILLEGAL_CONFIG,
                        "Key sort columns length should match key sort order length. Sort columns are "
                                + Arrays.toString(job.getOutputKeySortColumns()) + " but sort order is "
                                + Arrays.toString(job.getOutputKeySortOrder()));
            }
            StringBuilder errorMsg = new StringBuilder();
            if (!validatePartitionColumns(job, errorMsg)) {
                throwException(ErrorCode.ILLEGAL_CONFIG,
                        "Key partition columns should be inside of output key columns. " + errorMsg);
            }
            if (!between(job.getFunctionTimeout(), 1, 3600)) {
                throwException(ErrorCode.ILLEGAL_CONFIG, "Function timeout " + job.getFunctionTimeout()
                        + " out of bound, should be in [1, 3600] range.");
            }
            if (!between(job.getInstancePriority(), 0, 9)) {
                throwException(ErrorCode.ILLEGAL_CONFIG, "Instance priority" + job.getInstancePriority()
                        + " out of bound, should be in [0, 9] range.");
            }
            //      if (!between((long)(job.getCombinerCacheSpillPercent()*100), 0, 100)) {
            //        throwException(ErrorCode.ILLEGAL_CONFIG, "Combiner Cache Spill Percent" + job.getCombinerCacheSpillPercent()
            //                                                 + " out of bound, should be in [0, 1] range.");
            //      }
        }

        private boolean validatePartitionColumns(JobConf job, StringBuilder errorMsg) {
            if (job.getNumReduceTasks() > 0 && job.getPartitionerClass() == null) {
                return validateColumns(job.getPartitionColumns(), job.getMapOutputKeySchema(), errorMsg);
            }

            return true;
        }

        private boolean validateSchema(Column[] schema) {
            if (schema == null || schema.length < 1) {
                return false;
            }
            return true;
        }
    }

    /**
     * Resource Validator.
     *
     * Validate the resources.
     */
    static class ResourceValidator implements Validator {

        private JobConf job;
        private MetaExplorer explorer;

        public ResourceValidator(JobConf job, MetaExplorer explorer) {
            this.job = job;
            this.explorer = explorer;
        }

        @Override
        public void validate() throws OdpsException {
            // Check if resource exist
            String[] res = job.getResources();
            if (res == null || res.length <= 0) {
                return;
            }
            if (res.length > 256) {
                throwException(ErrorCode.TOO_MANY_RESOURCE_ITEMS, "Acturally " + String.valueOf(res.length));
            }
        }
    }

    static class PipelineValidtor implements Validator {

        private Pipeline pipeline;

        public PipelineValidtor(JobConf job) {
            this.pipeline = Pipeline.fromJobConf(job);
        }

        @Override
        public void validate() throws OdpsException {
            if (pipeline == null) {
                throwException(ErrorCode.ILLEGAL_CONFIG, "Pipeline not specified.");
            }
            if (pipeline.getNodeNum() == 0) {
                throwException(ErrorCode.ILLEGAL_CONFIG, "Pipeline is empty.");
            }
            if (pipeline.getFirstNode().getType().equals("reduce")) {
                throwException(ErrorCode.ILLEGAL_CONFIG, "First operator of pipeline must be mapper");
            }
            for (int i = 0; i < pipeline.getNodeNum(); i++) {
                if (i > 0 && pipeline.getNode(i).getType().equals("map")) {
                    throwException(ErrorCode.ILLEGAL_CONFIG, "Operators after first node must be reducer");
                }
                if (i < pipeline.getNodeNum() - 1) {
                    if (pipeline.getNode(i).getOutputKeySchema() == null) {
                        throwException(ErrorCode.ILLEGAL_CONFIG, "Operator ouput key schema not set");
                    }
                    if (pipeline.getNode(i).getOutputValueSchema() == null) {
                        throwException(ErrorCode.ILLEGAL_CONFIG, "Operator ouput value schema not set");
                    }
                }
            }
            StringBuilder errorMsg = new StringBuilder();
            if (!validatePartitionColumns(pipeline, errorMsg)) {
                throwException(ErrorCode.ILLEGAL_CONFIG,
                        "Key partition columns should be inside of output key columns. " + errorMsg);
            }
        }

        private boolean validatePartitionColumns(Pipeline pipeline, StringBuilder errorMsg) {
            TransformNode node = null;
            for (int i = 0; i < pipeline.getNodeNum() - 1; ++i) {
                node = pipeline.getNode(i);
                if (node.getPartitionerClass() == null
                        && !validateColumns(node.getPartitionColumns(), node.getOutputKeySchema(), errorMsg)) {
                    return false;
                }
            }

            return true;
        }

    }

    public static Validator getValidator(JobConf job, MetaExplorer explorer) {
        CompositeValidator validator = new CompositeValidator();
        if (Pipeline.fromJobConf(job) == null) {
            validator.addValidator(new ConfigValidator(job));
        } else {
            validator.addValidator(new PipelineValidtor(job));
        }
        validator.addValidator(new InputOutputValidator(job, explorer));
        validator.addValidator(new ResourceValidator(job, explorer));
        return validator;

    }

}