Java tutorial
/* * Copyright 2014 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.github.carlomicieli.footballdb.starter.parsers; import io.github.carlomicieli.footballdb.starter.documents.DocumentDownloader; import io.github.carlomicieli.footballdb.starter.documents.PathBuilder; import io.github.carlomicieli.footballdb.starter.domain.PlayerProfile; import io.github.carlomicieli.footballdb.starter.pages.PlayerProfilePage; import org.jsoup.nodes.Document; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import static java.util.Collections.unmodifiableMap; /** * @author Carlo Micieli */ @Component public class PlayerProfileParser extends Parser<PlayerProfile> { @Autowired public PlayerProfileParser(DocumentDownloader docs) { super(docs); } @Override protected PathBuilder pathBuilder() { return PathBuilder.nflDotCom(); } @Override protected PlayerProfile parseDocument(Document doc) { final PlayerProfilePage page = new PlayerProfilePage(doc); return PlayerProfile.builder().bio(extractInfo(page.heightWeightAndAge())) .information(extractBirth(page.bornInfo())).college(extractCollege(page.college())) .experience(extractExp(page.experience())).highSchool(extractHighSchool(page.highSchool())).build(); } protected static String extractExp(Optional<String> str) { return str.map(val -> { Matcher matcher = patternMatchString(expPattern(), val); if (matcher.find()) { return matcher.group(1); } return null; }).orElse(null); } protected static String extractCollege(Optional<String> str) { return str.map(val -> { Matcher matcher = patternMatchString(collegePattern(), val); if (matcher.find()) { return matcher.group(1); } return null; }).orElse(null); } protected static Map<String, String> extractBirth(Optional<String> str) { return str.map(val -> { Matcher matcher = patternMatchString(birthPattern(), val); Map<String, String> v = newMap(); if (matcher.find()) { v.put("birth_date", matcher.group(1)); v.put("city", matcher.group(2)); v.put("state", matcher.group(3)); } return unmodifiableMap(v); }).orElse(Collections.emptyMap()); } protected static Map<String, String> extractInfo(Optional<String> str) { return str.map(val -> { Matcher matcher = patternMatchString(infoPattern(), val); Map<String, String> v = newMap(); if (matcher.find()) { v.put("height", matcher.group(1)); v.put("weight", matcher.group(2)); v.put("age", matcher.group(3)); } return unmodifiableMap(v); }).orElse(Collections.emptyMap()); } protected static Map<String, String> extractHighSchool(Optional<String> str) { return str.map(val -> { String s = normalize(val); List<String> tokens = Stream .of(s.replace("High School: ", "").replace("]", "").replace("[", ",").split(",")) .map(String::trim).collect(Collectors.toList()); if (tokens.size() < 2) { return Collections.<String, String>emptyMap(); } Map<String, String> v = newMap(); v.put("high_school", tokens.get(0)); if (tokens.size() == 2) v.put("state", tokens.get(1)); else { v.put("city", tokens.get(1)); v.put("state", tokens.get(2)); } return unmodifiableMap(v); }).orElse(Collections.emptyMap()); } protected static String normalize(String s) { char nonBreakingSpace = 160; return s.replace(nonBreakingSpace, ' '); } private static HashMap<String, String> newMap() { return new HashMap<>(); } private static Matcher patternMatchString(Pattern pattern, String val) { String normalized = normalize(val); return pattern.matcher(normalized); } private static final Pattern EXP_PATTERN = Pattern.compile("Experience:\\s(\\d{1,2}\\.*)"); private static final Pattern BIRTH_PATTERN = Pattern .compile("Born:\\s(\\d{1,2}/\\d{1,2}/\\d{4})\\s(\\w*\\s*\\w*)\\s,\\s(\\w*)"); private static final Pattern INFO_PATTERN = Pattern .compile("Height:\\s(\\d-\\d{1,2})\\s{3}Weight:\\s(\\d{3})\\s{3}Age:\\s(\\d{1,2})"); private static final Pattern COLLEGE_PATTERN = Pattern.compile("College:\\s(\\w*\\s*\\w*)"); private static Pattern expPattern() { return EXP_PATTERN; } private static Pattern birthPattern() { return BIRTH_PATTERN; } private static Pattern infoPattern() { return INFO_PATTERN; } private static Pattern collegePattern() { return COLLEGE_PATTERN; } }