Java tutorial
/** * Copyright (C) 2015 Bruno Candido Volpato da Cunha (brunocvcunha@gmail.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.brunocvcunha.taskerbox.impl.crawler; import java.io.IOException; import java.net.URISyntaxException; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.brunocvcunha.taskerbox.core.http.TaskerboxHttpBox; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import lombok.extern.log4j.Log4j; @Log4j public class CodepadAction extends CrawlerAction { private static final String URL_DOWNLOAD = "http://codepad.org/{id}/raw.c"; private static long FETCH_INTERVAL = 1000L; @Override public void action(final Document entry) { log.debug("Validating " + entry.title()); for (Element el : entry.select(".section")) { final String id = el.select("a").attr("href").replace("http://codepad.org/", ""); String code = el.select("pre").text().replaceAll("\r?\n", " "); if (code.length() > 32) { code = code.substring(0, 32); } final String title = id + " - " + code; if (canAct(id)) { addAct(id); spreadAction(id, title); serializeAlreadyAct(); sleep(FETCH_INTERVAL); } } } public void spreadAction(final String id, String postTitle) { try { log.debug("Getting " + URL_DOWNLOAD.replace("{id}", id)); HttpResponse response = TaskerboxHttpBox.getInstance() .getResponseForURL(URL_DOWNLOAD.replace("{id}", id)); String content = TaskerboxHttpBox.getInstance().readResponseFromEntity(response.getEntity()); if (isConsiderable(id, content)) { if (isValid(id, content)) { logInfo(log, "[+] Bound: [" + postTitle + "]"); doValid("codepad_" + id, content); } else { log.debug("[-] Not Bound: [" + postTitle + "]"); doInvalid("codepad_" + id, content); } } } catch (ClientProtocolException e) { e.printStackTrace(); } catch (IllegalStateException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (URISyntaxException e) { e.printStackTrace(); } } }