com.aliyun.odps.flume.sink.OdpsWriter.java Source code

Java tutorial

Introduction

Here is the source code for com.aliyun.odps.flume.sink.OdpsWriter.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.aliyun.odps.flume.sink;

import com.aliyun.odps.*;
import com.aliyun.odps.data.ArrayRecord;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.flume.sink.dataobject.OdpsRowDO;
import com.aliyun.odps.flume.sink.dataobject.OdpsStreamRecordPackDO;
import com.aliyun.odps.tunnel.TunnelException;
import com.aliyun.odps.tunnel.io.StreamRecordPack;
import com.aliyun.odps.tunnel.io.StreamWriter;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.math.BigDecimal;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;

/**
 * A writer for the ODPS table. It writes the list of {@link com.aliyun.odps.flume.sink.dataobject.OdpsRowDO} to the
 * ODPS table. It handles the exception in the writing process by retrying after a while (0.2 to 10 seconds).
 */
public class OdpsWriter {
    private static final Logger logger = LoggerFactory.getLogger(OdpsWriter.class);

    private StreamWriter[] streamWriters;
    private Map<String, OdpsType> colNameTypeMap;
    private DateFormat dateFormat;
    private Column[] odpsColumns;
    private Random random;
    private TableSchema tableSchema;

    public OdpsWriter(Table odpsTable, StreamWriter[] streamWriters, String dateFormat, String[] inputColNames) {
        this.streamWriters = streamWriters;
        tableSchema = odpsTable.getSchema();
        odpsColumns = tableSchema.getColumns().toArray(new Column[0]);
        this.dateFormat = new SimpleDateFormat(dateFormat);
        this.colNameTypeMap = buildColNameTypeMap(inputColNames, tableSchema);
        this.random = new Random();
    }

    /**
     * Write list of {@link com.aliyun.odps.flume.sink.dataobject.OdpsRowDO} to ODPS table.
     * @param rowList List of {@link com.aliyun.odps.flume.sink.dataobject.OdpsRowDO} built from event batch.
     */
    public void write(List<OdpsRowDO> rowList) throws InterruptedException {
        if (rowList == null || rowList.isEmpty()) {
            return;
        }
        List<OdpsStreamRecordPackDO> packDOList = buildRecordPackList(rowList);
        if (packDOList == null || packDOList.isEmpty()) {
            return;
        }
        for (OdpsStreamRecordPackDO streamRecordPackDO : packDOList) {
            boolean exceptionFlag = true;
            int i = 0;
            while (exceptionFlag) {
                try {
                    writePack(streamRecordPackDO);
                    exceptionFlag = false;
                } catch (Exception e) {
                    ++i;
                    logger.error("OdpsWriter write() error, will retry after " + i * 200 + "ms...", e);
                    if (i == 50) {
                        i = 1;
                    }
                    try {
                        Thread.sleep(i * 200);
                    } catch (InterruptedException e1) {
                        logger.error("OdpsWriter write() failed, user stopped retry.");
                        throw new InterruptedException("OdpsWriter write() failed, user stopped retry.");
                    }
                }
            }
        }
    }

    private void writePack(OdpsStreamRecordPackDO packDO) throws IOException, TunnelException {
        if (StringUtils.isEmpty(packDO.getPartitionSpec())) {
            streamWriters[random.nextInt(streamWriters.length)].write(packDO.getRecordPack());
        } else {
            streamWriters[random.nextInt(streamWriters.length)].write(new PartitionSpec(packDO.getPartitionSpec()),
                    packDO.getRecordPack());
        }
    }

    private List<OdpsStreamRecordPackDO> buildRecordPackList(List<OdpsRowDO> rowDOList) {
        if (rowDOList == null || rowDOList.isEmpty()) {
            return null;
        }
        List<OdpsStreamRecordPackDO> recordPackDOList = Lists.newArrayList();
        Map<String, OdpsStreamRecordPackDO> partitionPackMap = Maps.newHashMap();
        for (OdpsRowDO rowDO : rowDOList) {
            OdpsStreamRecordPackDO packDO = partitionPackMap.get(rowDO.getPartitionSpec());

            try {
                if (packDO == null) {
                    packDO = new OdpsStreamRecordPackDO();
                    StreamRecordPack streamRecordPack = new StreamRecordPack(tableSchema);
                    packDO.setPartitionSpec(rowDO.getPartitionSpec());
                    packDO.setRecordPack(streamRecordPack);
                    partitionPackMap.put(rowDO.getPartitionSpec(), packDO);
                }
                Record record = buildRecord(rowDO.getRowMap());
                packDO.getRecordPack().append(record);
            } catch (IOException e) {
                logger.error("OdpsWriter buildRecordList() error, discard record.", e);
            }
        }
        if (partitionPackMap.keySet().size() > 0) {
            recordPackDOList.addAll(partitionPackMap.values());
        }
        return recordPackDOList;
    }

    private Record buildRecord(Map<String, String> rowMap) {
        Record record = new ArrayRecord(odpsColumns);
        for (Map.Entry<String, String> mapEntry : rowMap.entrySet()) {
            setField(record, mapEntry.getKey(), mapEntry.getValue(), colNameTypeMap.get(mapEntry.getKey()));
        }
        return record;
    }

    private void setField(Record record, String field, String fieldValue, OdpsType odpsType) {

        if (StringUtils.isNotEmpty(field) && StringUtils.isNotEmpty(fieldValue)) {
            switch (odpsType) {
            case STRING:
                record.setString(field, fieldValue);
                break;
            case BIGINT:
                record.setBigint(field, Long.parseLong(fieldValue));
                break;
            case DATETIME:
                if (dateFormat != null) {
                    try {
                        record.setDatetime(field, dateFormat.parse(fieldValue));
                    } catch (ParseException e) {
                        logger.error("OdpsWriter parse date error. Date value = " + fieldValue, e);
                    }
                }
                break;
            case DOUBLE:
                record.setDouble(field, Double.parseDouble(fieldValue));
                break;
            case BOOLEAN:
                if (StringUtils.equalsIgnoreCase(fieldValue, "true")) {
                    record.setBoolean(field, true);
                } else if (StringUtils.equalsIgnoreCase(fieldValue, "false")) {
                    record.setBoolean(field, false);
                }
                break;
            case DECIMAL:
                record.setDecimal(field, new BigDecimal(fieldValue));
            default:
                throw new RuntimeException("Unknown column type: " + odpsType);
            }
        }
    }

    private Map<String, OdpsType> buildColNameTypeMap(String[] inputColNames, TableSchema tableSchema) {
        Map<String, OdpsType> odpsNameTypeMap = Maps.newHashMap();
        for (Column column : tableSchema.getColumns()) {
            odpsNameTypeMap.put(column.getName(), column.getType());
        }
        Map<String, OdpsType> colNameTypeMap = Maps.newHashMap();
        for (String colName : inputColNames) {
            if (!StringUtils.isEmpty(colName)) {
                if (odpsNameTypeMap.containsKey(colName)) {
                    colNameTypeMap.put(colName, odpsNameTypeMap.get(colName));
                } else {
                    throw new RuntimeException(this.getClass().getName()
                            + " buildColNameTypeMap() error, field not exists in odps table, field=" + colName);
                }
            }
        }
        return colNameTypeMap;
    }
}