googleranking.processing.GoogleData.java Source code

Java tutorial

Introduction

Here is the source code for googleranking.processing.GoogleData.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package googleranking.processing;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.http.NameValuePair;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import org.apache.http.client.utils.URIBuilder;
import org.apache.http.message.BasicNameValuePair;

/**
 *
 *
 * @author tranthanhan
 */
public class GoogleData {
    private static final String MIDDLE_SERVER = "http://xymuqmc0.iqservs.jp/cgi-bin/getHTML.cgi";
    private static final String GOOGLE_SEARCH_WEB = "http://www.google.co.jp/search";

    public String keyWord;

    public GoogleData(String keyWord) {
        this.keyWord = keyWord;
    }

    public int getRankingForDomain(String Domain) {
        List<String> links = getLinksInPage();
        return links.indexOf(Domain) + 1;
    }

    public Document getGoogleHtml() {
        Document doc;
        try {
            URIBuilder search_builder = new URIBuilder(GOOGLE_SEARCH_WEB);
            List<NameValuePair> search_query = new ArrayList<NameValuePair>(2);
            search_query.add(new BasicNameValuePair("pws", "0"));
            search_query.add(new BasicNameValuePair("filter", "1"));
            search_query.add(new BasicNameValuePair("hl", "jp"));
            search_query.add(new BasicNameValuePair("num", Integer.toString(50)));
            search_query.add(new BasicNameValuePair("q", keyWord));
            search_query.add(new BasicNameValuePair("start", "0"));

            String search_url = search_builder.addParameters(search_query).build().toString();

            URIBuilder middle_builder = new URIBuilder(MIDDLE_SERVER);
            List<NameValuePair> middle_query = new ArrayList<NameValuePair>(2);
            middle_query.add(new BasicNameValuePair("url", search_url));
            middle_query.add(new BasicNameValuePair("ip", "49.212.93.235"));

            String url = middle_builder.addParameters(middle_query).build().toString();

            return Jsoup.connect(url).userAgent("Mozilla").get();
        } catch (IOException ex) {
            Logger.getLogger(GoogleData.class.getName()).log(Level.SEVERE, null, ex);
        } catch (URISyntaxException ex) {
            Logger.getLogger(GoogleData.class.getName()).log(Level.SEVERE, null, ex);
        }
        return null;
    }

    public List<String> getLinksInPage() {
        Document doc = getGoogleHtml();
        List<String> ret = new ArrayList<String>();
        try {
            Elements links = doc.select(".g>.r>a");
            for (Element link : links) {
                String url = link.absUrl("href");
                url = URLDecoder.decode(url.substring(url.indexOf("=") + 1, url.indexOf("&")), "UTF-8");
                if (url.startsWith("http") || url.startsWith("https")) {
                    ret.add(getDomain(url)); // Ads/news/etc
                }
            }
        } catch (Exception e) {
            Logger.getLogger(GoogleData.class.getName()).log(Level.SEVERE, null, e);
        }
        return ret;
    }

    private String getDomain(String url) throws URISyntaxException {
        URI uri = new URI(url);
        String domain = uri.getHost();
        return domain;
    }
}