Yahoo News Crawler
//package com.a4studio.android.util;
import java.io.IOException;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import android.util.Log;
import android.util.Xml;
public class YahooNewsCrawler {
private final static String TAG=YahooNewsCrawler.class.getSimpleName();
private String language = "en";
private String keyword;
private String site;
private int results;
private String uri= "";
private final static String NEWS_SERVICE = "http://search.yahooapis.com/NewsSearchService/V1/newsSearch?";
private final static String APP_ID = "iEjUXGrV34HmLcV9m1gm1OBUWUve.fUcYv553gw.MUHn5b8BA_8W8Fe4AxuOLMKKLLU-";
private List<NewsItem> newsResults = new ArrayList<NewsItem>();
public YahooNewsCrawler(String keyword,int results)
{
this.keyword = keyword;
this.results = results;
}
public void search()
{
XmlPullParser xmlPull = Xml.newPullParser();
URL urlObj;
try {
keyword = URLEncoder.encode(keyword,"UTF-8");
uri = NEWS_SERVICE+"appid="+APP_ID+"&"+
"query="+keyword+"&"+"" +
"results="+results+"&"+
"language=en";
Log.d(TAG,uri);
urlObj = new URL(uri);
URLConnection conn = urlObj.openConnection();
xmlPull.setInput(conn.getInputStream(),"UTF-8");
int eventCode = xmlPull.getEventType();
NewsItem item = null;
String name ;
while(eventCode != XmlPullParser.END_DOCUMENT)
{
switch (eventCode) {
case XmlPullParser.START_DOCUMENT:
break;
case XmlPullParser.START_TAG:
name = xmlPull.getName();
if (name.equalsIgnoreCase(NewsItem.RESULT)) {
item = new NewsItem();
} else if (item != null) {
String text = xmlPull.nextText();
if (name.equalsIgnoreCase(NewsItem.TITLE)) {
item.setTitle(text);
} else if (name.equalsIgnoreCase(NewsItem.SUMMARY)) {
item.setSummary(text);
} else if (name.equalsIgnoreCase(NewsItem.URL)) {
item.setUrl(text);
} else if (name.equalsIgnoreCase(NewsItem.CLICKURL)) {
item.setClickUrl(text);
} else if (name.equalsIgnoreCase(NewsItem.NEWSSOURCE)) {
item.setNewsSource(text);
} else if (name.equalsIgnoreCase(NewsItem.NEWSSOURCEURL)) {
item.setNewsSrouceUrl(text);
} else if (name.equalsIgnoreCase(NewsItem.LANGUAGE)) {
item.setLanguage(text);
} else if (name.equalsIgnoreCase(NewsItem.PUBLISHDATE)) {
item.setPublishDate(Long.parseLong(text));
} else if (name.equalsIgnoreCase(NewsItem.MODIFICATIONDATE)) {
item.setModifDate(Long.parseLong(text));
}
}
break;
case XmlPullParser.END_TAG:
if(item != null && xmlPull.getName().equalsIgnoreCase(NewsItem.RESULT))
{
newsResults.add(item);
item = null;
}
break;
default:
break;
}
eventCode = xmlPull.next();
}
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
Log.e(TAG, "search", e);
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
Log.e(TAG, "search", e);
} catch (IOException e) {
// TODO Auto-generated catch block
Log.e(TAG, "search", e);
} catch (XmlPullParserException e) {
// TODO Auto-generated catch block
Log.e(TAG, "search", e);
}
}
/**
* @return the newsResults
*/
public List<NewsItem> getNewsResults() {
return newsResults;
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
YahooNewsCrawler crawler = new YahooNewsCrawler("nokia siemens network",1);
crawler.search();
List<NewsItem> items = crawler.getNewsResults();
for(NewsItem item : items)
{
System.out.println("->"+item);
}
}
}
class NewsItem implements Serializable{
/**
*
*/
private static final long serialVersionUID = 1L;
public final static String RESULT = "Result";
public final static String TITLE = "Title";
public final static String SUMMARY = "Summary";
public final static String URL = "Url";
public final static String CLICKURL = "ClickUrl";
public final static String NEWSSOURCE = "NewsSource";
public final static String NEWSSOURCEURL = "NewsSourceUrl";
public final static String LANGUAGE = "Language";
public final static String PUBLISHDATE = "PublishDate";
public final static String MODIFICATIONDATE = "ModificationDate";
private String title;
private String summary;
private String url;
private String clickUrl;
private String newsSource;
private String newsSrouceUrl;
private String language;
private long publishDate;
private long modifDate;
private String content;
/**
* @return the title
*/
public String getTitle() {
return title;
}
/**
* @param title
* the title to set
*/
public void setTitle(String title) {
this.title = title;
}
/**
* @return the summary
*/
public String getSummary() {
return summary;
}
/**
* @param summary
* the summary to set
*/
public void setSummary(String summary) {
this.summary = summary;
}
/**
* @return the url
*/
public String getUrl() {
return url;
}
/**
* @param url
* the url to set
*/
public void setUrl(String url) {
this.url = url;
}
/**
* @return the clickUrl
*/
public String getClickUrl() {
return clickUrl;
}
/**
* @param clickUrl
* the clickUrl to set
*/
public void setClickUrl(String clickUrl) {
this.clickUrl = clickUrl;
}
/**
* @return the newsSource
*/
public String getNewsSource() {
return newsSource;
}
/**
* @param newsSource
* the newsSource to set
*/
public void setNewsSource(String newsSource) {
this.newsSource = newsSource;
}
/**
* @return the newsSrouceUrl
*/
public String getNewsSrouceUrl() {
return newsSrouceUrl;
}
/**
* @param newsSrouceUrl
* the newsSrouceUrl to set
*/
public void setNewsSrouceUrl(String newsSrouceUrl) {
this.newsSrouceUrl = newsSrouceUrl;
}
/**
* @return the language
*/
public String getLanguage() {
return language;
}
/**
* @param language
* the language to set
*/
public void setLanguage(String language) {
this.language = language;
}
/**
* @return the publishDate
*/
public long getPublishDate() {
return publishDate;
}
/**
* @param publishDate
* the publishDate to set
*/
public void setPublishDate(long publishDate) {
this.publishDate = publishDate;
}
/**
* @return the modifDate
*/
public long getModifDate() {
return modifDate;
}
/**
* @param modifDate
* the modifDate to set
*/
public void setModifDate(long modifDate) {
this.modifDate = modifDate;
}
/**
* @return the content
*/
public String getContent() {
return content;
}
/**
* @param content the content to set
*/
public void setContent(String content) {
this.content = content;
}
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
// TODO Auto-generated method stub
return this.title+"\n"+
this.summary +"\n"+
this.url+"\n"+
this.clickUrl+"\n"+
this.newsSource+"\n"+
this.newsSrouceUrl+"\n"+
this.language+"\n"+
this.publishDate+"\n"+
this.modifDate+"\n";
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
}
}
Related examples in the same category