downloadwolkflow.getWorkFlowList.java Source code

Java tutorial

Introduction

Here is the source code for downloadwolkflow.getWorkFlowList.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package downloadwolkflow;

import MyTest.DownloadFileTest;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 *
 * @author AiWangtao
 */
public class getWorkFlowList {

    public static void main(String args[]) {
        CloseableHttpClient httpclient = HttpClients.createDefault();
        String[] pageList = getPageList();
        System.out.println(pageList.length);
        for (int i = 1; i < pageList.length; i++) {
            System.out.println(pageList[i]);
            System.out.println("---------------------------------------------------------------------------");
            HttpGet httpget = new HttpGet(pageList[i]);
            try {
                HttpResponse response = httpclient.execute(httpget);
                String page = EntityUtils.toString(response.getEntity());
                Document mainDoc = Jsoup.parse(page);
                Elements resultList = mainDoc.select("div.resource_list_item");
                for (int j = 0; j < resultList.size(); j++) {
                    Element workflowResult = resultList.get(j);
                    Element detailInfo = workflowResult.select("div.main_panel").first().select("p.title.inline")
                            .first().select("a").first();
                    String detailUrl = "http://www.myexperiment.org" + detailInfo.attributes().get("href")
                            + ".html";
                    System.out.println(detailUrl);
                    downloadWorkFlow(detailUrl, httpclient);
                    Thread.sleep(1000);
                }
            } catch (IOException ex) {
                Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InterruptedException ex) {
                Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex);
            }

        }

        try {
            httpclient.close();
        } catch (IOException ex) {
            Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

    public static String[] getPageList() {
        String[] pageList = null;
        CloseableHttpClient httpclient = HttpClients.createDefault();
        try {
            HttpGet httpget = new HttpGet("http://www.myexperiment.org/workflows");
            HttpResponse response = httpclient.execute(httpget);
            String mainpage = EntityUtils.toString(response.getEntity());
            Document mainDoc = Jsoup.parse(mainpage);
            Element pageinfo = mainDoc.select("div.pagination ").first();
            //            System.out.println(pageinfo.toString());
            Elements pagesElemenets = pageinfo.select("[href]");
            int pageSize = Integer.parseInt(pagesElemenets.get(pagesElemenets.size() - 2).text());
            pageList = new String[pageSize + 1];
            for (int i = 1; i <= pageSize; i++) {
                pageList[i] = "http://www.myexperiment.org/workflows?page=" + i;
            }

        } catch (IOException ex) {
            Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex);
        }
        return pageList;
    }

    private static void downloadWorkFlow(String detailUrl, CloseableHttpClient httpclient) {
        try {
            HttpGet httpget = new HttpGet(detailUrl);
            HttpResponse response = httpclient.execute(httpget);
            String page = EntityUtils.toString(response.getEntity());
            Document mainDoc = Jsoup.parse(page);
            Element downloadEle = mainDoc.select("div#myexp_content ul li a").first();
            if (downloadEle == null) {
                downloadEle = mainDoc.select("div#myexp_content ul li:nth-child(1) span a").first();
            }

            String downloadUrl = downloadEle.attributes().get("href");
            Thread.sleep(500);
            if (downloadUrl.contains("download")) {
                downloadFiles(downloadUrl, httpclient);
            } else {
                System.out.println(detailUrl + " do not contain valuable resource");
            }
        } catch (IOException ex) {
            Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex);
        } catch (InterruptedException ex) {
            Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private static void writeToFile(String downloadUrl, HttpClient httpclient) throws IOException {
        HttpGet httpget = new HttpGet(downloadUrl);
        HttpResponse response = httpclient.execute(httpget);
        String xml = EntityUtils.toString(response.getEntity());
        String filename = downloadUrl.split("/")[downloadUrl.split("/").length - 1].split("\\?")[0];
        System.out.println(filename);
        try (PrintWriter out = new PrintWriter("data/" + filename)) {
            out.println(xml);
        }
    }

    private static void downloadFiles(String downloadUrl, CloseableHttpClient httpclient) {
        HttpGet httpget = new HttpGet(downloadUrl);
        HttpEntity entity = null;
        try {
            HttpResponse response = httpclient.execute(httpget);
            entity = response.getEntity();
            if (entity != null) {
                InputStream is = entity.getContent();
                String filename = downloadUrl.split("/")[downloadUrl.split("/").length - 1].split("\\?")[0];
                System.out.println(filename);
                BufferedInputStream bis = new BufferedInputStream(is);
                BufferedOutputStream bos = new BufferedOutputStream(
                        new FileOutputStream(new File("data/" + filename)));
                int readedByte;
                while ((readedByte = bis.read()) != -1) {
                    bos.write(readedByte);
                }
                bis.close();
                bos.close();
            }
        } catch (IOException ex) {
            Logger.getLogger(DownloadFileTest.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}