com.laudandjolynn.mytv.crawler.CrawlerGroup.java Source code

Java tutorial

Introduction

Here is the source code for com.laudandjolynn.mytv.crawler.CrawlerGroup.java

Source

/*******************************************************************************
 * Copyright 2015 htd0324@gmail.com
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package com.laudandjolynn.mytv.crawler;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;

import org.apache.commons.lang3.concurrent.BasicThreadFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.laudandjolynn.mytv.event.AllTvStationCrawlEndEvent;
import com.laudandjolynn.mytv.event.CrawlEvent;
import com.laudandjolynn.mytv.event.CrawlEventListener;
import com.laudandjolynn.mytv.event.CrawlEventListenerAdapter;
import com.laudandjolynn.mytv.event.ProgramTableCrawlEndEvent;
import com.laudandjolynn.mytv.exception.MyTvException;
import com.laudandjolynn.mytv.model.ProgramTable;
import com.laudandjolynn.mytv.model.TvStation;
import com.laudandjolynn.mytv.utils.Constant;

/**
 * @author: Laud
 * @email: htd0324@gmail.com
 * @date: 2015416 ?1:12:03
 * @copyright: www.laudandjolynn.com
 */
public class CrawlerGroup extends AbstractCrawler {
    private List<Crawler> crawlers = new ArrayList<Crawler>();
    private final static String CRAWLER_GROUP_NAME = "crawlergroup";
    private final static Logger logger = LoggerFactory.getLogger(CrawlerGroup.class);
    private CrawlEventListener listener = null;

    public CrawlerGroup() {
        this.listener = new CrawlEventListenerAdapter() {
            @Override
            public void itemFound(CrawlEvent event) {
                for (CrawlEventListener listener : listeners) {
                    listener.itemFound(event);
                }
            }
        };
    }

    /**
     * ???
     * 
     * @return
     */
    public List<Crawler> getCrawlers() {
        return crawlers;
    }

    /**
     * ?
     * 
     * @param crawler
     */
    public void addCrawler(Crawler crawler) {
        this.crawlers.add(crawler);
        crawler.registerCrawlEventListener(listener);
    }

    /**
     * ?
     * 
     * @param crawler
     */
    public void removeCrawler(Crawler crawler) {
        this.crawlers.remove(crawler);
    }

    @Override
    public String getCrawlerName() {
        return CRAWLER_GROUP_NAME;
    }

    @Override
    public String getUrl() {
        throw new MyTvException("url isn't avaliable of crawler group.");
    }

    @Override
    public List<TvStation> crawlAllTvStation() {
        List<TvStation> resultList = new ArrayList<TvStation>();
        int size = crawlers.size();
        int maxThreadNum = Constant.CPU_PROCESSOR_NUM;
        ThreadFactory threadFactory = new BasicThreadFactory.Builder().namingPattern("Mytv_CrawlerGroup_%d")
                .build();
        ExecutorService executorService = Executors.newFixedThreadPool(size > maxThreadNum ? maxThreadNum : size,
                threadFactory);
        CompletionService<List<TvStation>> completionService = new ExecutorCompletionService<List<TvStation>>(
                executorService);
        for (final Crawler crawler : crawlers) {
            Callable<List<TvStation>> task = new Callable<List<TvStation>>() {
                @Override
                public List<TvStation> call() throws Exception {
                    return crawler.crawlAllTvStation();
                }
            };
            completionService.submit(task);
        }
        executorService.shutdown();
        int count = 0;
        while (count < size) {
            try {
                List<TvStation> stationList = completionService.take().get();
                if (stationList != null) {
                    resultList.addAll(stationList);
                }
            } catch (InterruptedException e) {
                logger.error("crawl task of all tv station interrupted.", e);
            } catch (ExecutionException e) {
                logger.error("crawl task of all tv station executed fail.", e);
            }
            count++;
        }

        for (CrawlEventListener listener : listeners) {
            listener.crawlEnd(new AllTvStationCrawlEndEvent(this, resultList));
        }
        return resultList;
    }

    @Override
    public List<ProgramTable> crawlProgramTable(String date, TvStation station) {
        if (station == null || date == null) {
            logger.info("station and date must be not null.");
            return null;
        }
        List<ProgramTable> resultList = new ArrayList<ProgramTable>();
        for (Crawler crawler : crawlers) {
            if (crawler.exists(station)) {
                List<ProgramTable> ptList = crawler.crawlProgramTable(date, station);
                if (ptList != null) {
                    resultList.addAll(ptList);
                }
                break;
            }
        }
        for (CrawlEventListener listener : listeners) {
            listener.crawlEnd(new ProgramTableCrawlEndEvent(this, resultList, station.getName(), date));
        }
        return resultList;
    }

    @Override
    public boolean exists(TvStation station) {
        for (Crawler crawler : crawlers) {
            if (crawler.exists(station)) {
                return true;
            }
        }
        return false;
    }
}