Java tutorial
package kr.pe.javarss.mybus.task; import java.io.IOException; import java.io.InputStream; import java.net.UnknownServiceException; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import kr.pe.javarss.mybus.model.MyBus; import kr.pe.javarss.mybus.util.ParseException; import net.htmlparser.jericho.Element; import net.htmlparser.jericho.Source; import org.apache.http.HttpResponse; import org.apache.http.HttpStatus; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.params.BasicHttpParams; import org.apache.http.params.HttpConnectionParams; import org.apache.http.params.HttpParams; /** * ? ? . * */ public class GBusPageParser { private static final Pattern numberPattern = Pattern.compile("\\d+"); private static final int CONNECTION_TIMEOUT = 5000;// public static InputStream getPageInputStream(String url) throws IOException { // : 30 ? HttpParams params = new BasicHttpParams(); HttpConnectionParams.setConnectionTimeout(params, CONNECTION_TIMEOUT); HttpConnectionParams.setSoTimeout(params, CONNECTION_TIMEOUT); HttpClient hc = new DefaultHttpClient(params); HttpResponse res = hc.execute(new HttpGet(url)); if (res.getStatusLine().getStatusCode() != HttpStatus.SC_OK) { throw new UnknownServiceException(); } return res.getEntity().getContent(); } public static void parse(MyBus bus) throws IOException, ParseException { InputStream input = getPageInputStream(bus.pageUrl); Source s = new Source(input); try { s.fullSequentialParse(); } catch (Exception e) { throw new ParseException(e); } Element ul = s.getFirstElementByClass("mybus").getFirstElement("ul"); List<Element> li = ul.getChildElements(); if (li == null) { return; } int size = li.size(); if (size > 0) { bus.position1 = parsePosition(li.get(0)); } if (size > 1) { bus.position2 = parsePosition(li.get(1)); } } private static MyBus.Position parsePosition(Element e) { MyBus.Position p = new MyBus.Position(); List<Element> spans = e.getAllElements("span"); for (Element span : spans) { String css = span.getAttributeValue("class"); String content = span.getContent().toString(); if (css == null) { // <span>(711396)</span> p.licenseNumber = content; } else if (css.equals("num1") || css.equals("num2")) { // <span class="num1">1 / 2 ?</span> // <span class="num2">3 / 7 ?</span> // <span class="num1">? </span> Matcher m = numberPattern.matcher(content); if (m.find()) { p.remainingCount = Integer.parseInt(m.group()); } if (m.find()) { p.timeToArrive = Integer.parseInt(m.group()); } //if (!m.matches()) { // p.otherInfo = content; //} } else { // <span class="narrival">? .</span> //p.otherInfo = content; } } return p; } }