com.alibaba.otter.node.etl.extract.extractor.FileExtractor.java Source code

Java tutorial

Introduction

Here is the source code for com.alibaba.otter.node.etl.extract.extractor.FileExtractor.java

Source

/*
 * Copyright (C) 2010-2101 Alibaba Group Holding Limited.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.otter.node.etl.extract.extractor;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.alibaba.otter.node.etl.extract.exceptions.ExtractException;
import com.alibaba.otter.shared.common.model.config.ConfigHelper;
import com.alibaba.otter.shared.common.model.config.data.DataMediaPair;
import com.alibaba.otter.shared.common.model.config.pipeline.Pipeline;
import com.alibaba.otter.shared.common.utils.extension.ExtensionFactory;
import com.alibaba.otter.shared.common.utils.thread.ExecutorTemplate;
import com.alibaba.otter.shared.common.utils.thread.ExecutorTemplateGetter;
import com.alibaba.otter.shared.etl.extend.fileresolver.FileInfo;
import com.alibaba.otter.shared.etl.extend.fileresolver.FileResolver;
import com.alibaba.otter.shared.etl.extend.fileresolver.support.RemoteDirectoryFetcher;
import com.alibaba.otter.shared.etl.extend.fileresolver.support.RemoteDirectoryFetcherAware;
import com.alibaba.otter.shared.etl.model.DbBatch;
import com.alibaba.otter.shared.etl.model.EventColumn;
import com.alibaba.otter.shared.etl.model.EventData;
import com.alibaba.otter.shared.etl.model.EventType;
import com.alibaba.otter.shared.etl.model.FileBatch;
import com.alibaba.otter.shared.etl.model.FileData;
import com.alibaba.otter.shared.etl.model.Identity;
import com.alibaba.otter.shared.etl.model.RowBatch;

/**
 * rowBatch??
 * 
 * @author jianghang 2012-4-18 ?04:52:00
 * @version 4.0.2
 */
public class FileExtractor extends AbstractExtractor<DbBatch> {

    private static final Logger logger = LoggerFactory.getLogger(ExecutorTemplate.class);
    private ExtensionFactory extensionFactory;

    private RemoteDirectoryFetcher arandaRemoteDirectoryFetcher;
    private int retry = 3;
    private ExecutorTemplateGetter executorTemplateGetter;

    public void extract(DbBatch dbBatch) throws ExtractException {
        List<FileData> fileDatas = doFileExtract(dbBatch.getRowBatch());
        FileBatch fileBatch = new FileBatch();
        fileBatch.setFiles(fileDatas);
        Identity identity = new Identity();
        identity.setChannelId(dbBatch.getRowBatch().getIdentity().getChannelId());
        identity.setPipelineId(dbBatch.getRowBatch().getIdentity().getPipelineId());
        identity.setProcessId(dbBatch.getRowBatch().getIdentity().getProcessId());
        fileBatch.setIdentity(identity);
        dbBatch.setFileBatch(fileBatch);
    }

    /**
     * ??FileInfo.
     * 
     * @param rowBatch
     * @return
     */
    private List<FileData> doFileExtract(RowBatch rowBatch) {
        List<FileData> fileDatas = new ArrayList<FileData>();
        // ??
        Pipeline pipeline = getPipeline(rowBatch.getIdentity().getPipelineId());
        List<EventData> eventDatas = rowBatch.getDatas();
        for (EventData eventData : eventDatas) {
            if (eventData.getEventType().isDdl()) {
                continue;
            }

            List<DataMediaPair> dataMediaPairs = ConfigHelper.findDataMediaPairByMediaId(pipeline,
                    eventData.getTableId());
            if (dataMediaPairs == null) {
                throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId()
                        + " dataMediaPair is null,please check");
            }

            for (DataMediaPair dataMediaPair : dataMediaPairs) {
                if (dataMediaPair.getResolverData() == null
                        || dataMediaPair.getResolverData().getExtensionDataType() == null
                        || (dataMediaPair.getResolverData().getExtensionDataType().isClazz()
                                && StringUtils.isBlank(dataMediaPair.getResolverData().getClazzPath()))
                        || (dataMediaPair.getResolverData().getExtensionDataType().isSource()
                                && StringUtils.isBlank(dataMediaPair.getResolverData().getSourceText()))) {
                    continue;
                }

                FileResolver fileResolver = null;

                if (dataMediaPair.getResolverData() != null) {
                    fileResolver = extensionFactory.getExtension(FileResolver.class,
                            dataMediaPair.getResolverData());
                } else {
                    continue;
                }

                if (fileResolver == null) {
                    throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId()
                            + " the fileResolver className  = " + dataMediaPair.getResolverData().getClazzPath()
                            + " is null ,please check the class");
                }

                if (fileResolver instanceof RemoteDirectoryFetcherAware) {
                    RemoteDirectoryFetcherAware remoteDirectoryFetcherAware = (RemoteDirectoryFetcherAware) fileResolver;
                    remoteDirectoryFetcherAware.setRemoteDirectoryFetcher(arandaRemoteDirectoryFetcher);
                }

                List<FileData> singleRowFileDatas = getSingleRowFileInfos(dataMediaPair.getId(), fileResolver,
                        eventData);
                // ???
                for (FileData data : singleRowFileDatas) {
                    if (!fileDatas.contains(data)) {
                        fileDatas.add(data);
                    }
                }
            }
        }

        // ?????
        if (pipeline.getParameters().getFileDetect()) {
            doFileDetectCollector(pipeline, fileDatas);
        }
        return fileDatas;
    }

    private List<FileData> getSingleRowFileInfos(long pairId, FileResolver fileResolver, EventData eventData) {
        if (eventData.getEventType() == EventType.DELETE && fileResolver.isDeleteRequired() == false) {
            return new ArrayList<FileData>();
        }

        Map<String, String> rowMap = new HashMap<String, String>();

        List<EventColumn> keyColumns = eventData.getKeys();
        List<EventColumn> eventColumns = eventData.getUpdatedColumns();
        for (EventColumn eventColumn : keyColumns) {
            rowMap.put(eventColumn.getColumnName().toUpperCase(), eventColumn.getColumnValue());
        }
        for (EventColumn eventColumn : eventColumns) {
            rowMap.put(eventColumn.getColumnName().toUpperCase(), eventColumn.getColumnValue());
        }
        FileInfo[] fileInfos = fileResolver.getFileInfo(rowMap);
        if (fileInfos == null || fileInfos.length == 0) {
            return new ArrayList<FileData>();
        } else {
            List<FileData> fileDatas = new ArrayList<FileData>();
            for (FileInfo fileInfo : fileInfos) {
                FileData fileData = new FileData();
                fileData.setPairId(pairId); // id
                fileData.setTableId(eventData.getTableId());
                fileData.setEventType(eventData.getEventType());
                fileData.setLastModifiedTime(fileInfo.getLastModifiedTime());
                fileData.setNameSpace(fileInfo.getNamespace());
                fileData.setPath(fileInfo.getPath());
                fileData.setSize(fileInfo.getSize());
                fileDatas.add(fileData);
            }
            return fileDatas;
        }
    }

    private void doFileDetectCollector(Pipeline pipeline, List<FileData> fileDatas) {
        ExecutorTemplate executorTemplate = executorTemplateGetter.get();
        try {
            executorTemplate.start();
            // ?poolSize
            executorTemplate.adjustPoolSize(pipeline.getParameters().getFileLoadPoolSize());
            for (final FileData fileData : fileDatas) {
                // ???
                executorTemplate.submit(new Runnable() {

                    public void run() {
                        boolean isAranda = StringUtils.isNotEmpty(fileData.getNameSpace());
                        int count = 0;
                        Throwable exception = null;
                        while (count++ < retry) {
                            try {
                                if (isAranda) {
                                    // remote file
                                    throw new RuntimeException(fileData + " is not support!");
                                } else {
                                    // ?
                                    File file = new File(fileData.getPath());
                                    fileData.setLastModifiedTime(file.lastModified());
                                    fileData.setSize(file.length());
                                }

                                return;// 
                            } catch (Exception e) {
                                fileData.setLastModifiedTime(Long.MIN_VALUE);
                                fileData.setSize(Long.MIN_VALUE);
                                exception = e;
                            }
                        }

                        if (count >= retry) {
                            logger.warn(String.format("FileDetectCollector is error! collect failed[%s]",
                                    fileData.getNameSpace() + "/" + fileData.getPath()), exception);
                        }
                    }
                });
            }

            long start = System.currentTimeMillis();
            logger.info("start pipelinep[{}] waitFor FileData Size : {} ", pipeline.getId(), fileDatas.size());
            // ??
            executorTemplate.waitForResult();
            logger.info("end pipelinep[{}] waitFor FileData cost : {} ms ", pipeline.getId(),
                    (System.currentTimeMillis() - start));
        } finally {
            if (executorTemplate != null) {
                executorTemplateGetter.release(executorTemplate);
            }
        }
    }

    // ==================== setter / getter =====================

    public void setExtensionFactory(ExtensionFactory extensionFactory) {
        this.extensionFactory = extensionFactory;
    }

    public void setRetry(int retry) {
        this.retry = retry;
    }

    public void setArandaRemoteDirectoryFetcher(RemoteDirectoryFetcher arandaRemoteDirectoryFetcher) {
        this.arandaRemoteDirectoryFetcher = arandaRemoteDirectoryFetcher;
    }

    public void setExecutorTemplateGetter(ExecutorTemplateGetter executorTemplateGetter) {
        this.executorTemplateGetter = executorTemplateGetter;
    }

}