Java tutorial
/* * Copyright 2014 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.github.carlomicieli.footballdb.starter.documents; import io.github.carlomicieli.footballdb.starter.App; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.springframework.stereotype.Component; import java.io.IOException; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; import java.util.Optional; /** * @author Carlo Micieli */ @Component("documentDownloader") public class WebDocumentDownloader implements DocumentDownloader { private static final String NFL_ROOT_URL = "http://www.nfl.com"; private static final String CHROME_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/34.0.1847.116 Safari/537.36"; @Override public Optional<Document> from(String uri) { return Optional.ofNullable(downloadFromURL(uri)); } private static Document downloadFromURL(String url) { validateUrl(url); try { Document doc = Jsoup.connect(url).userAgent(CHROME_USER_AGENT).get(); App.log().info("Downloading '{}'...", doc.title()); return doc; } catch (IOException e) { App.log().error("Error for '{}': {}", url, e); return null; } } private static void validateUrl(String url) { try { URL u = new URL(url); u.toURI(); } catch (MalformedURLException | URISyntaxException e) { throw new IllegalArgumentException("Invalid url value: " + url); } } public static String nfl(String path) { return NFL_ROOT_URL + path; } }