Java tutorial
/* Copyright 2012, Shane Boulden Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package org.karndo.piracy; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.BufferedReader; import java.text.ParseException; import java.util.LinkedList; import java.util.Date; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.*; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.commons.io.IOUtils.*; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.DateUtils; import org.openide.windows.InputOutput; /** * A class for scraping data from the IMB live piracy map. Data from the site is * obtained from a HTML files; the class parses this data into PiracyEvent * objects for display/manipulation on the map. * @author Shane Boulden * @version June 2012 */ public class Scraper { /** * * @param url * @return */ private String get_piracy_data(String url) throws IOException { InputStream instream = null; BufferedReader reader = null; //Some Apache HTTP objects for accessing the website HttpClient httpclient = new DefaultHttpClient(); HttpGet httpget = new HttpGet(url); HttpResponse response = httpclient.execute(httpget); HttpEntity entity = response.getEntity(); if (entity != null) { instream = entity.getContent(); } //read characters from the instream reader = new BufferedReader(new InputStreamReader(instream)); String line = ""; String data = ""; int count = 0; while ((line = reader.readLine()) != null) { //find the right line if (line.contains( "http://www.icc-ccs.org/plugins/fabrik_visuali" + "zation/googlemap/googlemap-min.js")) { //TODO discover why it doesn't read another line without //the reader skipping reader.skip(1); data = (reader.readLine()); } } instream.close(); return data; } /** * * @param url * @param io * @return */ public LinkedList<PiracyEvent> parse_piracy_data(String url, InputOutput io) throws IOException { //use the private method in this class to get the data String data = get_piracy_data(url); System.out.println(data); //strip everything before "icons" String temp = StringUtils.strip(data, "head.ready(function() {fabrikMap45 = new FbGoogleMapViz('table_map', {\"icons\":["); temp = "\"" + temp; //after stripping the first section, the data can be split using the //'curly brackets' String[] events = StringUtils.split(temp, "{"); LinkedList<PiracyEvent> events_list = new LinkedList<PiracyEvent>(); //some parameters for holding data from the event strings PiracyEvent event1 = null; double longitude = 0.0; double latitude = 0.0; String attack_id = ""; String vessel_type = ""; String status = ""; Date date = null; for (String str : events) { try { //Strip out the latitude and longitude String lat1 = StringUtils.strip(StringUtils.substringBetween(str, "\"0\":", ","), "\""); String long1 = StringUtils.strip(StringUtils.substringBetween(str, "\"1\":", ","), "\""); //parse the values into doubles latitude = Double.parseDouble(lat1); longitude = Double.parseDouble(long1); //strip out the attack id. attack_id = StringUtils.strip(StringUtils.substringBetween(str, "\"2\":", "<br \\/>"), "\"Attack ID:"); //strip out the date String date_str = StringUtils.strip(StringUtils.substringBetween(str, "Date:", "<br"), "Date:"); // TODO change this to a GMT time-format date = DateUtils.parseDate(StringUtils.trim(date_str), "yyyy-MM-dd"); //strip out the Vessel type vessel_type = StringUtils.strip(StringUtils.substringBetween(str, "Vessel:", "<br \\/>"), "Vessel:"); //strip out the status status = StringUtils.strip(StringUtils.substringBetween(str, "Status:", "<br \\/>"), "Status:"); //create a piracy event event1 = new PiracyEvent(latitude, longitude, attack_id, date, StringUtils.trim(status), StringUtils.trim(vessel_type)); events_list.add(event1); //print to the supplied InputOutput from the main window io.getOut().println(event1); } catch (ParseException ex) { System.err.println("A parse exception occurred parsing the date"); System.err.println(ex.getMessage()); } finally { } } return events_list; } }