Back to project page BBC-News-Reader.
The source code is released under:
Copyright (c) 2011, 2012, Digital Lizard (Oscar Key, Thomas Boby) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the...
If you think the Android project BBC-News-Reader listed in this page is inappropriate, such as containing malicious code/tools or violating the copyright, please email info at java2s dot com, thanks.
/******************************************************************************* * BBC News Reader// w w w .j a v a 2 s . c om * Released under the BSD License. See README or LICENSE. * Copyright (c) 2011, Digital Lizard (Oscar Key, Thomas Boby) * All rights reserved. ******************************************************************************/ package com.digitallizard.bbcnewsreader.resource.web; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.net.URLConnection; import org.apache.http.client.ClientProtocolException; import org.apache.http.util.ByteArrayBuffer; public class HtmlParser { private static final String USER_AGENT = "Mozilla/5.0 (Linux; U; Android 2.2.1; en-us; MB525 Build/3.4.2-107_JDN-9) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"; /** * @param args * @throws IOException * @throws ClientProtocolException */ public static byte[] getPage(String stringUrl) throws Exception { URL url = new URL(stringUrl); URLConnection connection = url.openConnection(); System.setProperty("http.agent", ""); connection.setRequestProperty("User-Agent", USER_AGENT); InputStream stream = connection.getInputStream(); BufferedInputStream inputbuffer = new BufferedInputStream(stream); ByteArrayBuffer arraybuffer = new ByteArrayBuffer(50); int current = 0; while ((current = inputbuffer.read()) != -1) { arraybuffer.append((byte) current); } return arraybuffer.toByteArray(); } public static String parsePage(byte[] bytes) { // FIXME needs a tidy up if (bytes != null) { // convert the bytes into a string String html = new String(bytes); // trying parsing the page for news final String[] parsedNews = html.split("<div class=\"story-body\">", 2); if (parsedNews.length > 1) { // assume there are start and stop tags return parsedNews[1].split("<div class=\"share-this\">", 2)[0]; } else { // try parsing for sport final String[] parsedSport = html.split("<article class=\"mod story\">"); if(parsedSport.length > 1) { return parsedSport[1].split("</article>", 2)[0]; } else { // just return the entire page as a last resort return html; } } } else { return ""; } } }