Java URL Read getUrlSource(String url)

Here you can find the source of getUrlSource(String url)

Description

get Url Source

License

Open Source License

Declaration

public static ArrayList<String> getUrlSource(String url) throws IOException 

Method Source Code


//package com.java2s;
//License from project: Open Source License 

import java.io.BufferedReader;

import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;

import java.util.ArrayList;
import java.util.Random;

public class Main {
    public static ArrayList<String> getUrlSource(String url) throws IOException {
        ArrayList<String> returnList = new ArrayList<String>();
        returnList.clear();//from  ww  w.j  a  v a 2s. c  o  m
        URL site = new URL(url);
        HttpURLConnection yc = (HttpURLConnection) site.openConnection();
        yc.addRequestProperty("User-Agent", getRandomUserAgent());
        BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream(), "UTF-8"));
        String inputLine;
        while ((inputLine = in.readLine()) != null) {
            returnList.add(inputLine);
        }
        in.close();

        return returnList;
    }

    public static final String getRandomUserAgent() {
        String[] agents = new String[] { "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.0)",
                "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.1)",
                "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.2)",
                "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 6.0)",
                "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 6.1)", "Opera/7.51 (Windows NT 5.0; U) [en]",
                "Opera/7.51 (Windows NT 5.1; U) [en]",
                "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.22) Gecko/20110902 Firefox/3.6.22",
                "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Tablet PC 2.0)",
                "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.10 Safari/532.0",
                "Opera/9.80 (Windows NT 6.1; U; ru) Presto/2.9.168 Version/11.51",
                "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.36 (KHTML, like Gecko) Chrome/12.0.742.53 Safari/534.36 QQBrowser/6.3.8908.201",
                "Opera/7.51 (Windows NT 5.2; U) [en]", "Opera/7.51 (Windows NT 6.0; U) [en]",
                "Opera/7.51 (Windows NT 6.1; U) [en]",
                "Mozilla/4.0 (compatible; MSIE 6.0; X11; Linux x86_64; ru) Opera 10.10",
                "Opera/9.80 (X11; Linux x86_64; U; ru) Presto/2.2.15 Version/10.10",
                "Mozilla/5.0 (Windows NT 5.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
                "Mozilla/5.0 (X11; U; Linux x86_64; ru; rv:1.9.0.4) Gecko/2008111611 Gentoo Iceweasel/3.0.4",
                "Mozilla/1.1 (compatible; MSPIE 2.0; Windows CE)",
                "Mozilla/1.10 [en] (Compatible; RISC OS 3.70; Oregano 1.10)",
                "Mozilla/1.22 (compatible; MSIE 2.0d; Windows NT)", "Googlebot", "MSNBot", "Yandex", "StackRambler",
                "Mozilla/1.22 (compatible; MSIE 5.01; PalmOS 3.0) EudoraWeb 2" };
        Random random = new Random();
        return agents[random.nextInt(agents.length)];
    }
}

Related

  1. getUrlContent(String url)
  2. getURLContent_old(final String uri, final StringBuffer content)
  3. getUrlContentWithRetries(String url, long timeoutMs, long retryDelayMs)
  4. getUrlFollowingRedirects(String possibleRedirectionUrl)
  5. getUrlInfos(String urlAsString, int timeout)
  6. getUrlStatus(String url)
  7. getUrlTxt(String url)
  8. readAsString(final URL url)
  9. readListFromURL(URL p_url)