kr.pe.javarss.mybus.task.GBusPageParser.java Source code

Java tutorial

Introduction

Here is the source code for kr.pe.javarss.mybus.task.GBusPageParser.java

Source

package kr.pe.javarss.mybus.task;

import java.io.IOException;
import java.io.InputStream;
import java.net.UnknownServiceException;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import kr.pe.javarss.mybus.model.MyBus;
import kr.pe.javarss.mybus.util.ParseException;
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.Source;

import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;

/**
 *  ? ? .
 *
 */
public class GBusPageParser {

    private static final Pattern numberPattern = Pattern.compile("\\d+");

    private static final int CONNECTION_TIMEOUT = 5000;//

    public static InputStream getPageInputStream(String url) throws IOException {

        //    :   30   ?
        HttpParams params = new BasicHttpParams();
        HttpConnectionParams.setConnectionTimeout(params, CONNECTION_TIMEOUT);
        HttpConnectionParams.setSoTimeout(params, CONNECTION_TIMEOUT);

        HttpClient hc = new DefaultHttpClient(params);
        HttpResponse res = hc.execute(new HttpGet(url));
        if (res.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
            throw new UnknownServiceException();
        }

        return res.getEntity().getContent();
    }

    public static void parse(MyBus bus) throws IOException, ParseException {
        InputStream input = getPageInputStream(bus.pageUrl);
        Source s = new Source(input);

        try {
            s.fullSequentialParse();
        } catch (Exception e) {
            throw new ParseException(e);
        }

        Element ul = s.getFirstElementByClass("mybus").getFirstElement("ul");
        List<Element> li = ul.getChildElements();

        if (li == null) {
            return;
        }

        int size = li.size();
        if (size > 0) {
            bus.position1 = parsePosition(li.get(0));
        }
        if (size > 1) {
            bus.position2 = parsePosition(li.get(1));
        }
    }

    private static MyBus.Position parsePosition(Element e) {
        MyBus.Position p = new MyBus.Position();

        List<Element> spans = e.getAllElements("span");
        for (Element span : spans) {
            String css = span.getAttributeValue("class");
            String content = span.getContent().toString();

            if (css == null) {
                // <span>(711396)</span>
                p.licenseNumber = content;
            } else if (css.equals("num1") || css.equals("num2")) {
                // <span class="num1">1  /  2  ?</span>
                // <span class="num2">3  /  7  ?</span>
                // <span class="num1">? </span>
                Matcher m = numberPattern.matcher(content);
                if (m.find()) {
                    p.remainingCount = Integer.parseInt(m.group());
                }
                if (m.find()) {
                    p.timeToArrive = Integer.parseInt(m.group());
                }

                //if (!m.matches()) {
                //    p.otherInfo = content;
                //}
            } else {
                // <span class="narrival">?   .</span>
                //p.otherInfo = content;
            }

        }

        return p;
    }
}