com.aliyun.openservices.tablestore.hive.TableStoreInputFormat.java Source code

Java tutorial

Introduction

Here is the source code for com.aliyun.openservices.tablestore.hive.TableStoreInputFormat.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.aliyun.openservices.tablestore.hive;

import java.util.List;
import java.util.ArrayList;
import java.io.IOException;

import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.RecordReader;
import com.aliyun.openservices.tablestore.hadoop.PrimaryKeyWritable;
import com.aliyun.openservices.tablestore.hadoop.RowWritable;
import com.aliyun.openservices.tablestore.hadoop.TableStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.shims.ShimLoader;

import com.alicloud.openservices.tablestore.SyncClientInterface;
import com.alicloud.openservices.tablestore.model.TableMeta;
import com.alicloud.openservices.tablestore.model.PrimaryKeySchema;
import com.alicloud.openservices.tablestore.model.PrimaryKey;
import com.alicloud.openservices.tablestore.model.PrimaryKeyColumn;
import com.alicloud.openservices.tablestore.model.PrimaryKeyValue;
import com.alicloud.openservices.tablestore.model.Direction;
import com.alicloud.openservices.tablestore.model.RangeRowQueryCriteria;
import com.alicloud.openservices.tablestore.model.DescribeTableRequest;
import com.alicloud.openservices.tablestore.model.DescribeTableResponse;
import com.alicloud.openservices.tablestore.core.utils.Preconditions;

public class TableStoreInputFormat implements InputFormat<PrimaryKeyWritable, RowWritable> {
    private static Logger logger = LoggerFactory.getLogger(TableStoreInputFormat.class);

    @Override
    public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
        Configuration dest = translateConfig(job);
        SyncClientInterface ots = null;
        String columns = job.get(TableStoreConsts.COLUMNS_MAPPING);
        if (columns == null) {
            columns = job.get(serdeConstants.LIST_COLUMNS);
        }
        logger.debug("columns to get: {}", columns);
        List<org.apache.hadoop.mapreduce.InputSplit> splits;
        try {
            ots = TableStore.newOtsClient(dest);
            TableMeta meta = fetchTableMeta(ots, job.get(TableStoreConsts.TABLE_NAME));
            RangeRowQueryCriteria criteria = fetchCriteria(meta, columns);
            com.aliyun.openservices.tablestore.hadoop.TableStoreInputFormat.addCriteria(dest, criteria);
            splits = com.aliyun.openservices.tablestore.hadoop.TableStoreInputFormat.getSplits(dest, ots);
        } finally {
            if (ots != null) {
                ots.shutdown();
                ots = null;
            }
        }
        InputSplit[] res = new InputSplit[splits.size()];
        JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(new Job(job));
        Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);
        int i = 0;
        for (org.apache.hadoop.mapreduce.InputSplit split : splits) {
            res[i] = new TableStoreInputSplit(
                    (com.aliyun.openservices.tablestore.hadoop.TableStoreInputSplit) split, tablePaths[0]);
            ++i;
        }
        return res;
    }

    private static Configuration translateConfig(Configuration from) {
        Configuration to = new Configuration();
        {
            com.aliyun.openservices.tablestore.hadoop.Credential cred = new com.aliyun.openservices.tablestore.hadoop.Credential(
                    from.get(TableStoreConsts.ACCESS_KEY_ID), from.get(TableStoreConsts.ACCESS_KEY_SECRET),
                    from.get(TableStoreConsts.SECURITY_TOKEN));
            TableStore.setCredential(to, cred);
        }
        {
            String endpoint = from.get(TableStoreConsts.ENDPOINT);
            String instance = from.get(TableStoreConsts.INSTANCE);
            com.aliyun.openservices.tablestore.hadoop.Endpoint ep;
            if (instance == null) {
                ep = new com.aliyun.openservices.tablestore.hadoop.Endpoint(endpoint);
            } else {
                ep = new com.aliyun.openservices.tablestore.hadoop.Endpoint(endpoint, instance);
            }
            TableStore.setEndpoint(to, ep);
        }
        return to;
    }

    private static RangeRowQueryCriteria fetchCriteria(TableMeta meta, String strColumns) {
        RangeRowQueryCriteria res = new RangeRowQueryCriteria(meta.getTableName());
        res.setMaxVersions(1);
        List<PrimaryKeyColumn> lower = new ArrayList<PrimaryKeyColumn>();
        List<PrimaryKeyColumn> upper = new ArrayList<PrimaryKeyColumn>();
        for (PrimaryKeySchema schema : meta.getPrimaryKeyList()) {
            lower.add(new PrimaryKeyColumn(schema.getName(), PrimaryKeyValue.INF_MIN));
            upper.add(new PrimaryKeyColumn(schema.getName(), PrimaryKeyValue.INF_MAX));
        }
        res.setInclusiveStartPrimaryKey(new PrimaryKey(lower));
        res.setExclusiveEndPrimaryKey(new PrimaryKey(upper));
        res.addColumnsToGet(strColumns.split(","));
        return res;
    }

    private static TableMeta fetchTableMeta(SyncClientInterface ots, String table) {
        DescribeTableResponse resp = ots.describeTable(new DescribeTableRequest(table));
        return resp.getTableMeta();
    }

    @Override
    public RecordReader<PrimaryKeyWritable, RowWritable> getRecordReader(InputSplit split, JobConf job,
            Reporter reporter) throws IOException {
        Preconditions.checkNotNull(split, "split must be nonnull");
        Preconditions.checkNotNull(job, "job must be nonnull");
        Preconditions.checkArgument(split instanceof TableStoreInputSplit,
                "split must be an instance of " + TableStoreInputSplit.class.getName());
        TableStoreInputSplit tsSplit = (TableStoreInputSplit) split;
        Configuration conf;
        if (isHiveConfiguration(job)) {
            // map task, such as 'select *'
            conf = translateConfig(job);
        } else {
            // reduce task, such as 'select count(*)'
            conf = job;
        }
        final com.aliyun.openservices.tablestore.hadoop.TableStoreRecordReader rdr = new com.aliyun.openservices.tablestore.hadoop.TableStoreRecordReader();
        rdr.initialize(tsSplit.getDelegated(), conf);
        return new RecordReader<PrimaryKeyWritable, RowWritable>() {
            @Override
            public boolean next(PrimaryKeyWritable key, RowWritable value) throws IOException {
                boolean next = rdr.nextKeyValue();
                if (next) {
                    key.setPrimaryKey(rdr.getCurrentKey().getPrimaryKey());
                    value.setRow(rdr.getCurrentValue().getRow());
                }
                return next;
            }

            @Override
            public PrimaryKeyWritable createKey() {
                return new PrimaryKeyWritable();
            }

            @Override
            public RowWritable createValue() {
                return new RowWritable();
            }

            @Override
            public long getPos() throws IOException {
                return 0;
            }

            @Override
            public void close() throws IOException {
                rdr.close();
            }

            @Override
            public float getProgress() throws IOException {
                return rdr.getProgress();
            }
        };
    }

    private boolean isHiveConfiguration(Configuration conf) {
        String endpoint = conf.get(TableStoreConsts.ENDPOINT);
        return endpoint != null;
    }
}