com.changhong.bigdata.flume.source.dirregex.DirRegexSource.java Source code

Java tutorial

Introduction

Here is the source code for com.changhong.bigdata.flume.source.dirregex.DirRegexSource.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.changhong.bigdata.flume.source.dirregex;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.SimpleDateFormat;
//import java.sql.Date;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.flume.ChannelException;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDrivenSource;
import org.apache.flume.conf.Configurable;
import org.apache.flume.event.EventBuilder;
import org.apache.flume.instrumentation.SourceCounter;
import org.apache.flume.source.AbstractSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;
import com.google.gson.Gson;
import com.google.gson.JsonObject;

/**
 * @ClassName:DirRegexSource
 * @Decription:??.?????????tolalMemory
 * maxMemory0.4
 * @author YuYue
 * @date 2016-01-13 11:40:22
 * */

public class DirRegexSource extends AbstractSource implements EventDrivenSource, Configurable {
    private static final Logger logger = LoggerFactory.getLogger(DirRegexSource.class);
    private File monitorDir, checkFile;
    private Pattern monitorFilePattern, contentPattern;
    private long delayTime;
    private String charsetName;
    private int batchSize;
    private Properties properties;
    private Properties tmpProperties = new Properties();
    private ScheduledExecutorService scheduledExecutorService;
    private ExecutorService executorService;
    private SourceCounter sourceCounter;
    private String ipstr;

    private String DEFAULT_MONITORFILEREGEX = "[\\W\\w]+";
    private String DEFAULT_CHARSETNAME = "UTF-8";
    private long DEFAULT_DELAYTIME = 10l;
    private int DEFAULT_BATCHSIZE = 1024;

    //   private String partFileName;
    //   private String currentTime;
    /**
      * @Title: configure
      * @Description: ?flume?
      * @author YuYue
      * @param context   
      * @throws
    */
    public void configure(Context context) {
        logger.info("----------------------DirRegexSource configure...");
        try {
            // monitorDir?monitorFileRegex
            String strMonitorDir = context.getString("monitorDir");
            Preconditions.checkArgument(StringUtils.isNotBlank(strMonitorDir), "Missing Param:'monitorDir'");
            String monitorFileRegex = context.getString("monitorFileRegex", DEFAULT_MONITORFILEREGEX);
            Preconditions.checkArgument(StringUtils.isNotBlank(monitorFileRegex),
                    "Missing Param:'monitorFileRegex'");
            monitorFilePattern = Pattern.compile(monitorFileRegex);
            // checkFile
            String strCheckFile = context.getString("checkFile");
            Preconditions.checkArgument(StringUtils.isNotBlank(strCheckFile), "Missing Param:'checkFile'");

            // contentRegex
            String contentRegex = context.getString("contentRegex");
            Preconditions.checkArgument(StringUtils.isNotBlank(contentRegex), "Missing Param:'contentRegex'");
            contentPattern = Pattern.compile(contentRegex);
            // ip
            ipstr = context.getString("ip");
            Preconditions.checkArgument(StringUtils.isNotBlank(ipstr), "Missing Param:'contentRegex'");

            // delayTime?charsetName?batchSize
            delayTime = context.getLong("delayTime", DEFAULT_DELAYTIME);
            Preconditions.checkArgument(delayTime > 0, "'delayTime' must be greater than 0");
            charsetName = context.getString("charsetName", DEFAULT_CHARSETNAME);
            Preconditions.checkArgument(StringUtils.isNotBlank(charsetName), "Missing Param:'charsetName'");
            batchSize = context.getInteger("batchSize", DEFAULT_BATCHSIZE);
            Preconditions.checkArgument(batchSize > 0, "'batchSize' must be greater than 0");

            monitorDir = new File(strMonitorDir);
            checkFile = new File(strCheckFile);

            properties = new Properties();
            if (!checkFile.exists()) {
                checkFile.createNewFile();
            } else {
                FileInputStream checkfile001 = new FileInputStream(checkFile);
                properties.load(checkfile001);
                checkfile001.close();
            }

            executorService = Executors.newCachedThreadPool();
            scheduledExecutorService = Executors.newSingleThreadScheduledExecutor();
            sourceCounter = new SourceCounter("DirRegexSource");
        } catch (Exception e) {
            // TODO Auto-generated catch block
            throw new IllegalArgumentException(e);
        }
        logger.info("----------------------DirRegexSource configured!");
    }

    /**
      * @Title: start
      * @Description: ?
      * @author YuYue
      * @param start()   
      * @throws
    */
    public void start() {
        logger.info("----------------------DirRegexSource starting...");
        sourceCounter.start();
        Runnable dirRunnable = new DirRunnable(monitorDir);
        scheduledExecutorService.scheduleWithFixedDelay(dirRunnable, 0, delayTime, TimeUnit.SECONDS);
        super.start();
        logger.info("----------------------DirRegexSource started!");
    }

    /**
      * @Title: configure
      * @Description: ?
      * @author YuYue
      * @param stop()   
      * @throws
    */
    public void stop() {
        logger.info("----------------------DirRegexSource stopping...");
        scheduledExecutorService.shutdown();
        executorService.shutdown();
        try {
            scheduledExecutorService.awaitTermination(10L, TimeUnit.SECONDS);
            executorService.awaitTermination(10L, TimeUnit.SECONDS);
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        scheduledExecutorService.shutdownNow();
        executorService.shutdown();
        sourceCounter.stop();
        super.stop();
        logger.info("----------------------DirRegexSource stopped!");
    }

    /**
      * @Title: DirRunnable
      * @Description: ,??"myDefine.log-2016-01-17"
      * @Description:???"yyyy-MM-dd"??
      * @author YuYue
      * @param runnable   
      * @throws
    */
    private class Json {
        String filename;
        String ip;
        String body;
    }

    private class DirRunnable implements Runnable {
        private File monitorDir;

        DirRunnable(File monitorDir) {
            this.monitorDir = monitorDir;
        }

        public void run() {
            logger.debug("----------------------dir monitor start...");
            monitorFile(monitorDir);
            logger.debug("----------------------dir monitor stoped");
        }

        private void monitorFile(File dir) {
            for (File tmpFile : dir.listFiles()) {
                //            partFileName = tmpFile.getName().substring(tmpFile.getName().length()-10,
                //                  tmpFile.getName().length());
                //            Date dt = new Date();
                //            SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
                //            currentTime = format.format(dt);
                //            if(currentTime.equals(partFileName)){
                //                logger.debug("------------------------------dir name after matches");
                if (tmpFile.isFile() && !tmpProperties.containsKey(tmpFile.getPath())) {
                    //                logger.info("------------------------------dir matches");
                    Matcher matcher = monitorFilePattern.matcher(tmpFile.getName());
                    if (matcher.matches()) {
                        //                  logger.info("-------------------dir matched");
                        Runnable fileRunnable = null;
                        // new file
                        if (!properties.containsKey(tmpFile.getPath())) {
                            fileRunnable = new FileRunnable(tmpFile, 0);
                            tmpProperties.put(tmpFile.getPath(), "0");
                        } else {
                            int readedLength = Integer.valueOf(properties.get(tmpFile.getPath()).toString());
                            // changed file
                            if (readedLength < tmpFile.length()) {
                                fileRunnable = new FileRunnable(tmpFile, readedLength);
                                tmpProperties.put(tmpFile.getPath(), readedLength + "");
                                // roll file
                            } else if (readedLength > tmpFile.length()) {
                                fileRunnable = new FileRunnable(tmpFile, 0);
                                tmpProperties.put(tmpFile.getPath(), "0");
                                // unchanged file
                            } else {
                                continue;
                            }
                        }
                        executorService.submit(fileRunnable);
                        continue;
                    }
                    //            } else if (tmpFile.isDirectory()) {
                    //               monitorFile(tmpFile);
                    //            }
                } else if (tmpFile.isDirectory()) {
                    monitorFile(tmpFile);
                }
            }
        }
    }

    /**
      * @Title: FileRunnable
      * @Description: ??contentRegex??.find()?
      * @author YuYue
      * @param runnable   
      * @throws
    */
    private class FileRunnable implements Runnable {
        private File monitorFile;
        private int readedLength;

        FileRunnable(File monitorFile, int readedLength) {
            this.monitorFile = monitorFile;
            this.readedLength = readedLength;
        }

        public void run() {
            logger.debug("----------------------file monitor start...");
            logger.info("----------------------read {}", monitorFile.getPath());

            FileInputStream fis = null;
            try {
                // read file(read in batches)
                StringBuilder strBuilder = new StringBuilder();
                fis = new FileInputStream(monitorFile);
                int readed = 0;
                int mark = 0;
                fis.skip(readedLength);
                byte[] arrByte = new byte[1024 * 1024];
                long freeMemory = Runtime.getRuntime().freeMemory();
                while (readed + readedLength < monitorFile.length()) {
                    int read = 0;
                    while ((read = fis.read(arrByte)) != -1) {
                        if (arrByte.length > read) {
                            strBuilder.append(new String(arrByte, 0, read, charsetName));
                        } else {
                            strBuilder.append(new String(arrByte, charsetName));
                        }
                        readed += read;
                        if (Runtime.getRuntime().totalMemory() == Runtime.getRuntime().maxMemory()
                                || (Runtime.getRuntime().totalMemory() > Runtime.getRuntime().maxMemory() * 0.4
                                        && Runtime.getRuntime().freeMemory() > freeMemory)) {
                            freeMemory = Runtime.getRuntime().freeMemory();
                            break;
                        }
                        freeMemory = Runtime.getRuntime().freeMemory();
                    }
                    logger.debug("----------------------get {} byte data", readed - readedLength);

                    // create events(remove the last event)
                    List<Integer> numList = new ArrayList<Integer>();
                    Matcher contentMatcher = contentPattern.matcher(strBuilder.toString());
                    List<Event> eventList = new ArrayList<Event>();
                    //               List<StrToJson> strToJsons =new ArrayList<StrToJson>();

                    while (contentMatcher.find()) {
                        String contentString = contentMatcher.group(1);
                        String filenameString = monitorFile.getName();

                        //transform to json
                        Json json = new Json();
                        json.filename = filenameString;
                        json.body = contentString;
                        json.ip = ipstr;
                        Gson gson = new Gson();
                        String eventString = gson.toJson(json);

                        //add event to evenlist
                        //                  contentString = "["+filenameString+"]" + " " +contentString;
                        Event event = EventBuilder.withBody(eventString.getBytes());
                        event.getHeaders().put("filePath", monitorFile.getPath());
                        eventList.add(event);
                        numList.add(contentMatcher.end(1));
                        mark = contentMatcher.start(1);
                    }
                    if (readed + readedLength < monitorFile.length() && eventList.size() > 0) {
                        eventList.remove(eventList.size() - 1);
                        numList.remove(numList.size() - 1);
                    }
                    logger.debug("----------------------create {} events", eventList.size());

                    // process events(process in batches)
                    if (eventList.size() != 0) {
                        sourceCounter.addToEventReceivedCount(eventList.size());
                        int batchCount = eventList.size() / batchSize + 1;
                        try {
                            for (int i = 0; i < batchCount; i++) {
                                if (i != batchCount - 1) {
                                    tmpProperties.put(monitorFile.getPath(),
                                            (readedLength + numList.get((i + 1) * batchSize - 1)) + "");
                                    sourceCounter.addToEventAcceptedCount(
                                            eventList.subList(i * batchSize, (i + 1) * batchSize).size());
                                    getChannelProcessor().processEventBatch(
                                            eventList.subList(i * batchSize, (i + 1) * batchSize));
                                } else {
                                    tmpProperties.put(monitorFile.getPath(), (readed + readedLength) + "");
                                    sourceCounter.addToEventAcceptedCount(
                                            eventList.subList(i * batchSize, eventList.size()).size());
                                    getChannelProcessor()
                                            .processEventBatch(eventList.subList(i * batchSize, eventList.size()));
                                }
                            }
                        } catch (ChannelException ex) {
                            // TODO Auto-generated catch block
                            ex.printStackTrace();
                        }
                        logger.debug("----------------------process {} batchs", batchCount);
                    } else {
                        tmpProperties.put(monitorFile.getPath(), (readed + readedLength) + "");
                    }
                    if (mark == 0) {
                        strBuilder.setLength(0);
                    } else {
                        strBuilder.delete(0, mark);
                    }
                    logger.debug("----------------------file monitor stoped");
                }
                fis.close();
            } catch (Throwable e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } finally {
                properties.put(monitorFile.getPath(), tmpProperties.get(monitorFile.getPath()));
                try {
                    FileOutputStream checkfile = new FileOutputStream(checkFile);
                    properties.store(checkfile, null);
                    checkfile.close();
                } catch (FileNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
                tmpProperties.remove(monitorFile.getPath());
            }
        }
    }
}