Java tutorial
/* * @(#)TableParsingBO.java $version 2014. 8. 5. * * Copyright 2007 NHN Corp. All rights Reserved. * NAVER Corp. PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. */ package com.naver.timetable.bo; import java.util.List; import java.util.Map; import java.util.Queue; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Isolation; import org.springframework.transaction.annotation.Transactional; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.naver.timetable.dao.CategoryDAO; import com.naver.timetable.dao.ConfigDAO; import com.naver.timetable.dao.LectureDAO; import com.naver.timetable.model.CampusMajorEnum; import com.naver.timetable.model.Category; import com.naver.timetable.model.Lecture; import com.naver.timetable.model.LectureAttending; import com.naver.timetable.model.LectureSearchParam; import com.naver.timetable.model.LectureTime; /** * ? ? ? BO * @author younghan */ @Service public class TableParsingBO { private static final Logger LOG = LoggerFactory.getLogger(TableParsingBO.class); private static final String DEFAULT_URL = "http://webs.hufs.ac.kr:8989/jsp/HUFS/stu1/stu1_c0_a0_d2.jsp?org_sect=A&ledg_year=%s&ledg_sessn=%s&campus_sect=%s&gubun=%s&%s=%s"; private static final String CATEGORY_URL = "http://webs.hufs.ac.kr:8989/jsp/HUFS/stu1/stu1_c0_a0_d1.jsp?org_sect=A&ledg_year=%s&ledg_sessn=%s&campus_sect=%s"; private static final String TR_TAG_REGEX = "(<tr[^>]*?>)([\\s\\S]*?)(?=<\\/tr>)"; private static final String TD_TAG_REGEX = "(<td([^>]*?)>)([\\s\\S]*?)(?=<\\/td>)"; private static final String CHECK_ANCHOR_REGEX = "([\\S\\s]*?)href([\\S\\s]*)"; private static final String SPLIT_ANCHOR_REGEX = "\"([\\S]*?)\">([\\S\\s]*)<\\/a>"; private static final String SELECT_TAG_REGEX = "<select name=\"%s\"([\\s\\S]*?)<\\/select>"; private static final String OPTION_TAG_REGEX = "value[\\s]?=\"(\\S*?)\">([\\S\\s]*?)<\\/option>"; private static final int THREAD_COUNT = 10; public static final Map<String, String> WEEKDAY = new ImmutableMap.Builder<String, String>().put("", "MON") .put("", "TUE").put("", "WED").put("", "THU").put("", "FRI").put("", "SAT").build(); @Autowired CategoryDAO categoryDAO; @Autowired LectureDAO lectureDAO; @Autowired ConfigDAO configDAO; @Autowired HttpClientBO httpClientBO; @Transactional(isolation = Isolation.READ_COMMITTED) public void doParsing(String year, String season) { // ? ?? . if (configDAO.isExistSeason(year, season) == 0) { configDAO.insertSeason(year, season); saveCategory(year, season); saveTimeTable(year, season); } } /** * ? ?? url? Queue? ?. * @param year * @param season * @return */ protected Queue<UrlEntity> makeUrlQueue(String year, String season) { Queue<UrlEntity> urlQueue = Lists.newLinkedList(); for (CampusMajorEnum campusMajor : CampusMajorEnum.values()) { List<String> categoryCodes = categoryDAO.getCatgCode(campusMajor.getCampus(), campusMajor.getMajorCode()); for (String categoryCode : categoryCodes) { UrlEntity urlEntity = new UrlEntity(year, season, categoryCode, campusMajor); urlQueue.add(urlEntity); //?? wait notify? // urlQueue.notify(); } } return urlQueue; } public void saveTimeTable(String year, String season) { Queue<UrlEntity> urlQueue = makeUrlQueue(year, season); // LectureMakeThread makeLectureThread[] = new LectureMakeThread[THREAD_COUNT]; for (int i = 0; i < THREAD_COUNT; i++) { makeLectureThread[i] = new LectureMakeThread(urlQueue); makeLectureThread[i].start(); } // ?? ??? ? // Thread ?. for (int i = 0; i < THREAD_COUNT; i++) { try { makeLectureThread[i].join(); } catch (InterruptedException e) { LOG.debug("main interrupted", e); } } List<LectureTime> timeList = makeTimeList(lectureDAO.getLectureList(new LectureSearchParam(year, season))); if (!timeList.isEmpty()) { lectureDAO.saveClassTimeList(timeList); } } /** * @param lectures * @return ?? ? <, ?>? . */ public List<LectureTime> makeTimeList(List<Lecture> lectures) { List<LectureTime> lectureTimeList = Lists.newArrayList(); for (Lecture lecture : lectures) { String[] result = StringUtils.split(lecture.getRoom(), " "); String weekDay = ""; for (String t : result) { // ? ?? . if (StringUtils.isNumeric(t)) { LectureTime ct = new LectureTime(); ct.setLectureID(lecture.getLectureID()); ct.setWeekDay(weekDay + t); lectureTimeList.add(ct); } else if (t.length() < 4) { weekDay = WEEKDAY.get(t); } } } return lectureTimeList; } /** * / ? / ? ? ? ? 4 ? ? . * ? ?. */ public void saveCategory(String year, String season) { //? ? ? . List<Category> existCategory = categoryDAO.getAllCategory(); for (CampusMajorEnum campusMajor : CampusMajorEnum.values()) { String url = String.format(CATEGORY_URL, year, season, campusMajor.getCampus()); List<Category> categoryToInsert = deleteExistCategory(existCategory, makeCategoryList(url, campusMajor)); if (categoryToInsert.size() > 0) categoryDAO.insertCategories(categoryToInsert); } } /** * ? ? categoryId ? ? category * @param existCategory * @param newCategory * @return */ public List<Category> deleteExistCategory(List<Category> existCategory, List<Category> newCategory) { Map<String, Category> compareMap = Maps.newHashMap(); for (Category category : existCategory) { compareMap.put(category.getCategoryId(), category); } List<Category> result = Lists.newArrayList(); for (Category category : newCategory) { if (!compareMap.containsKey(category.getCategoryId())) { result.add(category); } } return result; } public List<Category> makeCategoryList(String url, CampusMajorEnum campusMajor) { String htmlBody = httpClientBO.getHttpBody(url); Matcher selectMatcher = makeMatcher(String.format(SELECT_TAG_REGEX, campusMajor.getMajorUrl()), htmlBody); selectMatcher.find(); Matcher categoryMatcher = makeMatcher(OPTION_TAG_REGEX, selectMatcher.group(1)); List<Category> categories = Lists.newArrayList(); while (categoryMatcher.find()) { categories.add(new Category(categoryMatcher.group(1), categoryMatcher.group(2), campusMajor.getMajorCode(), campusMajor.getCampus())); } return categories; } /** * html ?? ? Lecture? List . * @param htmlBody html * @param categoryId ? ID * @return */ public List<Lecture> parsingToLecture(String htmlBody, String categoryId, String year, String season) { htmlBody = htmlBody.replaceAll("<!--(.*?)-->", ""); //? Matcher trMatcher = makeMatcher(TR_TAG_REGEX, htmlBody); List<Lecture> lectureList = Lists.newArrayList(); trMatcher.find(); // while (trMatcher.find()) { Matcher tdMatcher = makeMatcher(TD_TAG_REGEX, trMatcher.group(2)); Lecture lecture = convertTdToLecture(tdMatcher, categoryId, year, season); lectureList.add(lecture); } return lectureList; } /** * td? Matcher ?? Lecture?? . * @param tdMatcher * @return */ public Lecture convertTdToLecture(Matcher tdMatcher, String categoryId, String year, String season) { Lecture lecture = new Lecture(); lecture.setCatgId(categoryId); int count = 0; while (tdMatcher.find()) { String text = tdMatcher.group(3).trim(); switch (count) { case 2: lecture.setGrade(text); break; case 3: lecture.setLectureNum(text); break; case 5: List<String> splitName = splitLectureName(text); lecture.setUrl(splitName.get(0)); lecture.setLectureName(splitName.get(1)); break; case 6: lecture.setPoint(Integer.parseInt(text)); break; case 8: lecture.setProf(text); break; case 9: lecture.setRoom(text); break; case 13: lecture.setCyber(Boolean.toString(text.equals("?"))); break; case 14: lecture.setForNative(Boolean.toString(text.equals("??"))); break; } count++; } lecture.setLectureYear(year); lecture.setLectureSeason(season); return lecture; } /** * td? ? ?? * @param lectureNameText ? td * @return */ public List<String> splitLectureName(String lectureNameText) { if (lectureNameText.matches(CHECK_ANCHOR_REGEX)) { Matcher matcher = makeMatcher(SPLIT_ANCHOR_REGEX, lectureNameText); matcher.find(); return Lists.newArrayList(matcher.group(1), matcher.group(2)); } else { // a tag return Lists.newArrayList("", lectureNameText); } } /** * ? text Matcher . * @param regex * @param text * @return */ protected Matcher makeMatcher(String regex, String text) { Pattern pattern = Pattern.compile(regex); return pattern.matcher(text); } /** * htmlBody? ? AttendingList ? * @param htmlBody * @return */ public List<LectureAttending> convertTdToAttending(String htmlBody) { htmlBody = htmlBody.replaceAll("<!--(.*?)-->", ""); //? Matcher trMatcher = makeMatcher(TR_TAG_REGEX, htmlBody); List<LectureAttending> attendingList = Lists.newArrayList(); trMatcher.find(); // while (trMatcher.find()) { Matcher tdMatcher = makeMatcher(TD_TAG_REGEX, trMatcher.group(2)); LectureAttending attending = new LectureAttending(); //3 for (int i = 0; i <= 3; i++) { tdMatcher.find(); } attending.setLectureNum(tdMatcher.group(3).trim()); // 10 ?? for (int i = 0; i <= 6; i++) { tdMatcher.find(); } String tdText = tdMatcher.group(3).trim(); attending.setAttending(Integer.parseInt(tdText.split("/")[0])); attendingList.add(attending); } return attendingList; } /** * ?? */ public void saveAttending() { // for(CampusMajorEnum campusMajor : CampusMajorEnum.values()) { // List<String> categoryCodes = categoryDAO.getCatgCode(campusMajor.getCampus(), campusMajor.getMajorCode()); // for(String categoryId : categoryCodes) { // String url = String.format(DEFAULT_URL, // configDAO.getValue(YEAR), configDAO.getValue(SEASON), campusMajor.getCampus(), campusMajor.getMajorCode(), campusMajor.getMajorUrl(), categoryId); // String htmlBody = httpClientBO.getHttpBody(url); // // lectureDAO.saveAttending(convertTdToAttending(htmlBody)); // } // } } class UrlEntity { String year; String season; String categoryId; CampusMajorEnum campusMajor; public UrlEntity() { } /** * @param year * @param season * @param categoryId * @param campusMajor */ public UrlEntity(String year, String season, String categoryId, CampusMajorEnum campusMajor) { super(); this.year = year; this.season = season; this.categoryId = categoryId; this.campusMajor = campusMajor; } public String getYear() { return year; } public void setYear(String year) { this.year = year; } public String getSeason() { return season; } public void setSeason(String season) { this.season = season; } public String getCategoryId() { return categoryId; } public void setCategoryId(String categoryId) { this.categoryId = categoryId; } public CampusMajorEnum getCampusMajor() { return campusMajor; } public void setCampusMajor(CampusMajorEnum campusMajor) { this.campusMajor = campusMajor; } } class LectureMakeThread extends Thread { Queue<UrlEntity> urlQueue; List<Lecture> lectureList; public LectureMakeThread(Queue<UrlEntity> urlQueue) { this.urlQueue = urlQueue; lectureList = Lists.newArrayList(); } public List<Lecture> getLectureList() { return lectureList; } public void run() { while (true) { UrlEntity urlEntity; synchronized (urlQueue) { // ?? thread ????. if (urlQueue.isEmpty()) { break; } urlEntity = urlQueue.poll(); } // urlEntity lectureList? String url = String.format(DEFAULT_URL, urlEntity.getYear(), urlEntity.getSeason(), urlEntity.getCampusMajor().getCampus(), urlEntity.getCampusMajor().getMajorCode(), urlEntity.getCampusMajor().getMajorUrl(), urlEntity.getCategoryId()); //httpBody String htmlBody = httpClientBO.getHttpBody(url); htmlBody = htmlBody.replaceAll("<!--(.*?)-->", ""); //? Matcher trMatcher = makeMatcher(TR_TAG_REGEX, htmlBody); trMatcher.find(); // while (trMatcher.find()) { Matcher tdMatcher = makeMatcher(TD_TAG_REGEX, trMatcher.group(2)); Lecture lecture = convertTdToLecture(tdMatcher, urlEntity.getCategoryId(), urlEntity.getYear(), urlEntity.getSeason()); lectureList.add(lecture); } } lectureDAO.saveClassInfoList(this.lectureList); } } class LectureMakeRunnable implements Runnable { Queue<UrlEntity> urlQueue; List<Lecture> lectureList; public LectureMakeRunnable(Queue<UrlEntity> urlQueue) { this.urlQueue = urlQueue; lectureList = Lists.newArrayList(); } public LectureMakeRunnable() { } /** * * @see java.lang.Runnable#run() */ @Override public void run() { while (true) { UrlEntity urlEntity; synchronized (urlQueue) { if (urlQueue.isEmpty()) { try { if (urlQueue.peek() == null) urlQueue.wait(); } catch (InterruptedException e) { // LOG.debug("thread run : ", e); } } urlEntity = urlQueue.peek(); if (urlEntity.getSeason() == "ENDQUEUE") { break; } urlQueue.poll(); // ? ? poll } String url = String.format(DEFAULT_URL, urlEntity.getYear(), urlEntity.getSeason(), urlEntity.getCampusMajor().getCampus(), urlEntity.getCampusMajor().getMajorCode(), urlEntity.getCampusMajor().getMajorUrl(), urlEntity.getCategoryId()); //httpBody String htmlBody = httpClientBO.getHttpBody(url); htmlBody = htmlBody.replaceAll("<!--(.*?)-->", ""); //? Matcher trMatcher = makeMatcher(TR_TAG_REGEX, htmlBody); trMatcher.find(); // while (trMatcher.find()) { Matcher tdMatcher = makeMatcher(TD_TAG_REGEX, trMatcher.group(2)); Lecture lecture = convertTdToLecture(tdMatcher, urlEntity.getCategoryId(), urlEntity.getYear(), urlEntity.getSeason()); lectureList.add(lecture); } } } } }