de.ncoder.studipsync.studip.jsoup.JsoupStudipAdapter.java Source code

Java tutorial

Introduction

Here is the source code for de.ncoder.studipsync.studip.jsoup.JsoupStudipAdapter.java

Source

/*
 * The MIT License (MIT)
 *
 * Copyright (c) 2014 Niko Fink
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

package de.ncoder.studipsync.studip.jsoup;

import de.ncoder.studipsync.data.Download;
import de.ncoder.studipsync.data.LoginData;
import de.ncoder.studipsync.data.Seminar;
import de.ncoder.studipsync.studip.StudipAdapter;
import de.ncoder.studipsync.studip.StudipException;
import de.ncoder.studipsync.ui.UIAdapter;
import org.json.simple.JSONValue;
import org.json.simple.parser.JSONParser;
import org.jsoup.Connection;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CancellationException;

public class JsoupStudipAdapter implements StudipAdapter {
    private static final Logger log = LoggerFactory.getLogger(JsoupStudipAdapter.class);

    private final UIAdapter ui;
    private final Path cookiesPath;
    private final int timeoutMs;

    private Seminar currentSeminar;

    public JsoupStudipAdapter(UIAdapter ui, Path cookiesPath, int timeoutMs) {
        this.ui = ui;
        this.cookiesPath = cookiesPath;
        this.timeoutMs = timeoutMs;
    }

    // --------------------------------LIFECYCLE-------------------------------

    @Override
    public void init() throws StudipException {
        con = new HttpConnection();
        navigate(PAGE_COVER);
    }

    @Override
    public void close() throws IOException {
        ui.close();
    }

    // --------------------------------BROWSER---------------------------------

    private HttpConnection con;
    private Document document;
    private final List<NavigationListener> listeners = new ArrayList<>();

    private void setDocument(Document document) throws StudipException {
        this.document = document;
        try {
            URL url = new URL(document.baseUri());
            log.trace("NAV: " + url);
            for (NavigationListener listener : listeners) {
                listener.navigated(url);
            }
        } catch (MalformedURLException e) {
            StudipException ex = new StudipException("Illegal URL " + document.baseUri(), e);
            ex.put("studip.url", document.baseUri());
            ex.put("studip.document", document);
            throw ex;
        }
    }

    protected void navigate(String url) throws StudipException {
        try {
            con.url(url);
            con.timeout(timeoutMs);
            try {
                setDocument(con.get());
            } catch (IOException e) {
                throw new StudipException("Can't navigate to " + url, e);
            }
        } catch (StudipException ex) {
            ex.put("navigate.url", url);
            ex.put("navigate.connection", con);
            ex.put("navigate.document", document);
            throw ex;
        }
    }

    @Override
    public InputStream startDownload(Download download, boolean diffOnly) throws IOException, StudipException {
        try {
            URL url;
            if (diffOnly) {
                url = download.getDiffUrl();
            } else {
                url = download.getFullUrl();
            }
            URLConnection urlCon = url.openConnection();
            if (!(urlCon instanceof HttpURLConnection)) {
                StudipException ex = new StudipException(
                        new IllegalArgumentException("Can only download via http"));
                ex.put("download.url", url);
                ex.put("download.urlConnection", urlCon);
                throw ex;
            }
            HttpURLConnection con = (HttpURLConnection) urlCon;
            con.setRequestProperty("Cookie",
                    HttpConnection.Response.getRequestCookieString(this.con.request().cookies()));
            return con.getInputStream();
        } catch (StudipException ex) {
            ex.put("download.diffOnly", diffOnly);
            ex.put("download.download", download);
            throw ex;
        }
    }

    public void displayWebsite() {
        try {
            Path tmp = Files.createTempFile("studip-dump", ".html");
            Files.copy(new ByteArrayInputStream(document.outerHtml().getBytes()), tmp,
                    StandardCopyOption.REPLACE_EXISTING);
            log.info("Displaying " + con.url());
            ui.displayWebpage(tmp.toUri());
        } catch (IOException e) {
            log.warn("Can't write page dump", e);
        }
    }

    // --------------------------------LOG IN----------------------------------

    @Override
    public boolean doLogin() throws CancellationException, StudipException {
        try {
            navigate(PAGE_LOGIN);
            if (hasCookies()) {
                restoreCookies();
                navigate(PAGE_BASE);
            } else {
                log.info("Requesting login data.");
                LoginData login = ui.requestLoginData();
                if (login != null) {
                    doLogin(login);
                    login.clean();
                } else {
                    throw new CancellationException("Login cancelled by user");
                }
            }

            if (isLoggedIn()) {
                saveCookies();
                return true;
            } else {
                deleteCookies();
                return false;
            }
        } catch (IOException e) {
            StudipException ex = new StudipException(e);
            ex.put("studip.cookiesPath", cookiesPath);
            ex.put("studip.url", document == null ? "none" : document.baseUri());
            throw ex;
        }
    }

    protected void doLogin(LoginData login) throws StudipException {
        try {
            con.url(PAGE_DO_LOGIN);
            con.method(Connection.Method.POST);
            con.timeout(timeoutMs);

            con.header("Origin", "https://studip.uni-passau.de");
            con.header("Host", "studip.uni-passau.de");
            con.header("Content-Type", "application/x-www-form-urlencoded");
            con.header("Referer", "https://studip.uni-passau.de/studip/login.php");

            con.data("username", login.getUsername());
            con.data("challenge", document.getElementById("challenge").val());
            con.data("login_ticket", document.getElementById("login_ticket").val());
            con.data("response", document.getElementById("response").val());
            con.data("resolution", "1366x768");
            con.data("submitbtn", "");
            con.data("password", new String(login.getPassword()));
            login.clean();
            try {
                //FIXME check for password leaks
                setDocument(con.post());
            } catch (IOException e) {
                throw new StudipException("Can't login", e);
            } finally {
                con.data("password", "XXX");
                login.clean();
            }
        } catch (StudipException ex) {
            ex.put("navigate.url", con == null || con.request() == null ? "none" : con.request().url());
            ex.put("navigate.connection", con);
            ex.put("navigate.document", document);
            throw ex;
        }
    }

    @Override
    public boolean isLoggedIn() {
        Elements selected = document.select("#toolbar .toolbar_menu li:last-of-type a");
        return selected.size() == 1 && "Logout".equals(selected.get(0).text().trim());
    }

    public void ensureLoggedIn() throws StudipException {
        while (!isLoggedIn()) {
            doLogin();
        }
    }

    // --------------------------------SELECTED SEMINAR------------------------

    @Override
    public void selectSeminar(Seminar seminar) throws StudipException {
        try {
            navigate(String.format(PAGE_SELECT_SEMINAR, seminar.getHash()));
            if (!isSeminarSelected(seminar)) {
                StudipException ex = new StudipException("Could not select Seminar " + seminar);
                ex.put("studip.url", document == null ? "none" : document.baseUri());
                ex.put("studip.document", document);
                throw ex;
            }
            if (currentSeminar != seminar) {
                log.debug("Selected " + seminar);
            }
            currentSeminar = seminar;
        } catch (StudipException ex) {
            ex.put("studip.newSeminar", seminar);
            ex.put("studip.currentSeminar", currentSeminar);
            throw ex;
        }
    }

    @Override
    public Seminar getSelectedSeminar() {
        return currentSeminar;
    }

    @Override
    public boolean isSeminarSelected(Seminar seminar) {
        Elements selected = document.select("#register");
        return selected.size() == 1 && seminar.getFullName().equals(selected.get(0).text().trim());
    }

    public void ensureCurrentSeminarSelected() throws StudipException {
        while (!isSeminarSelected(currentSeminar)) {
            selectSeminar(currentSeminar);
        }
    }

    // --------------------------------COOKIES---------------------------------

    public void saveCookies() throws IOException {
        if (cookiesPath != null) {
            log.info("Save Cookies");
            if (Files.exists(cookiesPath)) {
                Files.delete(cookiesPath);
            }
            try (Writer w = Files.newBufferedWriter(cookiesPath, Charset.defaultCharset())) {
                w.write(JSONValue.toJSONString(con.request().cookies()));
            }
        }
    }

    @SuppressWarnings("unchecked")
    public void restoreCookies() throws IOException {
        if (cookiesPath != null) {
            log.info("Restore Cookies");
            try (Reader r = Files.newBufferedReader(cookiesPath, Charset.defaultCharset())) {
                Object o = new JSONParser().parse(r);
                con.request().cookies().putAll((Map) o);
            } catch (org.json.simple.parser.ParseException | ClassCastException e) {
                throw new IOException("Illegal data in cookies file " + cookiesPath, e);
            }
        }
    }

    public void deleteCookies() throws IOException {
        if (cookiesPath != null && Files.exists(cookiesPath)) {
            Files.delete(cookiesPath);
        }
    }

    public boolean hasCookies() {
        try {
            return cookiesPath != null && Files.isRegularFile(cookiesPath) && Files.size(cookiesPath) > 0;
        } catch (IOException e) {
            log.warn("Couldn't read cookies from " + cookiesPath + ", prompting for password", e);
            return false;
        }
    }

    // --------------------------------PARSERS---------------------------------

    @Override
    public List<Seminar> parseSeminars() throws StudipException {
        ensureLoggedIn();

        navigate(PAGE_SEMINARS);

        Elements events = document.select("#content>table:first-of-type>tbody>tr");
        List<Seminar> seminars = new ArrayList<>();
        for (org.jsoup.nodes.Element event : events) {
            if (event.select(">td").size() > 4) {
                Elements info = event.select(">td:nth-of-type(4)>a:first-of-type");
                Elements font = info.select("font");
                if (info.size() >= 1 && font.size() >= 2) {
                    Seminar seminar = Seminar.getSeminar(info.get(0).absUrl("href"), font.get(0).text().trim(),
                            font.get(1).text().trim());
                    seminars.add(seminar);
                }
            }
        }
        log.debug("Parsed " + seminars.size() + " seminars.");
        log.trace(seminars.toString());
        return seminars;
    }

    @Override
    public List<Download> parseDownloads(String downloadsUrl, boolean structured) throws StudipException {
        try {
            ensureLoggedIn();
            ensureCurrentSeminarSelected();

            navigate(downloadsUrl);
            Map<Integer, Download> stack = new HashMap<>();
            List<Download> downloads = new ArrayList<>();

            Elements rows = document
                    .select("#content>table>tbody>tr:nth-of-type(2)>td:nth-of-type(2)>table>tbody>tr>td>table");
            for (org.jsoup.nodes.Element row : rows) {
                Elements content = row.select(">tbody>tr>td.printhead");
                Elements insets = row.select(">tbody>tr>td.blank img");
                if (content.size() >= 2) {
                    Elements info = content.get(1).select("a");
                    Elements link = content.get(2).select("a[title]");
                    List<TextNode> time = content.get(2).textNodes();
                    if (info.size() > 0 && link.size() > 0 && time.size() > 0) {
                        Download download = Download.getDownload(link.get(0).absUrl("href"),
                                info.get(0).text().trim(),
                                time.get(time.size() - 1).text().trim().replace("\u00a0", ""), "");
                        download.setSeminar(currentSeminar);
                        int level = insets.size() - 3;
                        if (level > 0) {
                            download.setParent(stack.get(level - 1));
                        } else {
                            download.setLevel(level);
                        }
                        stack.put(download.getLevel(), download);
                        downloads.add(download);
                        //TODO read size, description
                    }
                }
            }
            log.debug("Parsed " + downloads.size() + " downloads.");
            log.trace(downloads.toString());
            return downloads;
        } catch (StudipException ex) {
            ex.put("studip.seminar", currentSeminar);
            ex.put("parseDownloads.listUrl", downloadsUrl);
            ex.put("parseDownloads.structured", structured);
            throw ex;
        }
    }

    // --------------------------------LISTENERS-------------------------------

    public static interface NavigationListener {
        public abstract void navigated(URL url);
    }

    public boolean addNavigationListener(NavigationListener navigationListener) {
        return listeners.add(navigationListener);
    }

    public boolean removeNavigationListener(NavigationListener o) {
        return listeners.remove(o);
    }
}