io.github.carlomicieli.footballdb.starter.parsers.PlayerProfileParser.java Source code

Java tutorial

Introduction

Here is the source code for io.github.carlomicieli.footballdb.starter.parsers.PlayerProfileParser.java

Source

/*
 * Copyright 2014 the original author or authors.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package io.github.carlomicieli.footballdb.starter.parsers;

import io.github.carlomicieli.footballdb.starter.documents.DocumentDownloader;
import io.github.carlomicieli.footballdb.starter.documents.PathBuilder;
import io.github.carlomicieli.footballdb.starter.domain.PlayerProfile;
import io.github.carlomicieli.footballdb.starter.pages.PlayerProfilePage;
import org.jsoup.nodes.Document;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static java.util.Collections.unmodifiableMap;

/**
 * @author Carlo Micieli
 */
@Component
public class PlayerProfileParser extends Parser<PlayerProfile> {

    @Autowired
    public PlayerProfileParser(DocumentDownloader docs) {
        super(docs);
    }

    @Override
    protected PathBuilder pathBuilder() {
        return PathBuilder.nflDotCom();
    }

    @Override
    protected PlayerProfile parseDocument(Document doc) {
        final PlayerProfilePage page = new PlayerProfilePage(doc);

        return PlayerProfile.builder().bio(extractInfo(page.heightWeightAndAge()))
                .information(extractBirth(page.bornInfo())).college(extractCollege(page.college()))
                .experience(extractExp(page.experience())).highSchool(extractHighSchool(page.highSchool())).build();
    }

    protected static String extractExp(Optional<String> str) {
        return str.map(val -> {
            Matcher matcher = patternMatchString(expPattern(), val);

            if (matcher.find()) {
                return matcher.group(1);
            }
            return null;
        }).orElse(null);
    }

    protected static String extractCollege(Optional<String> str) {
        return str.map(val -> {
            Matcher matcher = patternMatchString(collegePattern(), val);
            if (matcher.find()) {
                return matcher.group(1);
            }
            return null;
        }).orElse(null);
    }

    protected static Map<String, String> extractBirth(Optional<String> str) {
        return str.map(val -> {
            Matcher matcher = patternMatchString(birthPattern(), val);
            Map<String, String> v = newMap();
            if (matcher.find()) {
                v.put("birth_date", matcher.group(1));
                v.put("city", matcher.group(2));
                v.put("state", matcher.group(3));
            }

            return unmodifiableMap(v);
        }).orElse(Collections.emptyMap());
    }

    protected static Map<String, String> extractInfo(Optional<String> str) {
        return str.map(val -> {
            Matcher matcher = patternMatchString(infoPattern(), val);
            Map<String, String> v = newMap();
            if (matcher.find()) {
                v.put("height", matcher.group(1));
                v.put("weight", matcher.group(2));
                v.put("age", matcher.group(3));
            }

            return unmodifiableMap(v);
        }).orElse(Collections.emptyMap());
    }

    protected static Map<String, String> extractHighSchool(Optional<String> str) {
        return str.map(val -> {
            String s = normalize(val);
            List<String> tokens = Stream
                    .of(s.replace("High School: ", "").replace("]", "").replace("[", ",").split(","))
                    .map(String::trim).collect(Collectors.toList());

            if (tokens.size() < 2) {
                return Collections.<String, String>emptyMap();
            }

            Map<String, String> v = newMap();
            v.put("high_school", tokens.get(0));
            if (tokens.size() == 2)
                v.put("state", tokens.get(1));
            else {
                v.put("city", tokens.get(1));
                v.put("state", tokens.get(2));
            }
            return unmodifiableMap(v);
        }).orElse(Collections.emptyMap());
    }

    protected static String normalize(String s) {
        char nonBreakingSpace = 160;
        return s.replace(nonBreakingSpace, ' ');
    }

    private static HashMap<String, String> newMap() {
        return new HashMap<>();
    }

    private static Matcher patternMatchString(Pattern pattern, String val) {
        String normalized = normalize(val);
        return pattern.matcher(normalized);
    }

    private static final Pattern EXP_PATTERN = Pattern.compile("Experience:\\s(\\d{1,2}\\.*)");
    private static final Pattern BIRTH_PATTERN = Pattern
            .compile("Born:\\s(\\d{1,2}/\\d{1,2}/\\d{4})\\s(\\w*\\s*\\w*)\\s,\\s(\\w*)");
    private static final Pattern INFO_PATTERN = Pattern
            .compile("Height:\\s(\\d-\\d{1,2})\\s{3}Weight:\\s(\\d{3})\\s{3}Age:\\s(\\d{1,2})");
    private static final Pattern COLLEGE_PATTERN = Pattern.compile("College:\\s(\\w*\\s*\\w*)");

    private static Pattern expPattern() {
        return EXP_PATTERN;
    }

    private static Pattern birthPattern() {
        return BIRTH_PATTERN;
    }

    private static Pattern infoPattern() {
        return INFO_PATTERN;
    }

    private static Pattern collegePattern() {
        return COLLEGE_PATTERN;
    }
}