mServer.crawler.sender.MediathekSrf.java Source code

Java tutorial

Introduction

Here is the source code for mServer.crawler.sender.MediathekSrf.java

Source

/*
 * MediathekView
 * Copyright (C) 2008 W. Xaver
 * W.Xaver[at]googlemail.com
 * http://zdfmediathk.sourceforge.net/
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
package mServer.crawler.sender;

import de.mediathekview.mlib.Config;
import de.mediathekview.mlib.Const;
import de.mediathekview.mlib.daten.DatenFilm;
import de.mediathekview.mlib.tool.Log;
import de.mediathekview.mlib.tool.MSStringBuilder;

import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Scanner;

import mServer.crawler.CrawlerTool;
import mServer.crawler.FilmeSuchen;
import mServer.crawler.GetUrl;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;

public class MediathekSrf extends MediathekReader {

    public static final String SENDERNAME = Const.SRF;
    private static final int MAX_SEITEN_THEMA = 5;
    private static final int MAX_FILME_KURZ = 6;

    private static final String URL1_M3U8 = "https://srfvodhd-vh.akamaihd.net";
    private static final String URL2_M3U8 = "http://srfvodhd-vh.akamaihd.net";
    private static final String URL3_M3U8 = "http://hdvodsrforigin-f.akamaihd.net";

    private static final String HTTPS = "https";
    private static final String HTTP = "http";
    private static final String SRF_TOPIC_PAGE_URL = "http://www.srf.ch/play/v2/tv/topicList?layout=json";
    private static final String SRF_DATA_BLOCK_BEGIN_PATTERN = "data-teaser=\"";
    private static final char SRF_DATA_BLOCK_END = '"';

    public MediathekSrf(FilmeSuchen ssearch, int startPrio) {
        super(ssearch, SENDERNAME, /* threads */ 3, /* urlWarten */ 100, startPrio);
    }

    @Override
    public void addToList() {
        // data-teaser-title="1 gegen 100"
        // data-teaser-url="/sendungen/1gegen100"
        final String muster = "{\"id\":\"";
        MSStringBuilder seite = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
        listeThemen.clear();
        meldungStart();
        GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
        seite = getUrlIo.getUri_Utf(SENDERNAME, SRF_TOPIC_PAGE_URL, seite, "");
        int pos = 0;
        int pos1;
        String thema, id;

        while ((pos = seite.indexOf(muster, pos)) != -1) {
            pos += muster.length();
            if ((pos1 = seite.indexOf("\"", pos)) != -1) {
                id = seite.substring(pos, pos1);
                if (id.length() < 10) {
                    //{"id":"A","title":"A","contai....
                    continue;
                }
                if (!id.isEmpty()) {
                    thema = seite.extract("\"title\":\"", "\"", pos1);
                    thema = StringEscapeUtils.unescapeJava(thema).trim();
                    listeThemen.addUrl(new String[] { id, thema });
                }
            }
        }

        if (Config.getStop() || listeThemen.isEmpty()) {
            meldungThreadUndFertig();
        } else {
            meldungAddMax(listeThemen.size());
            for (int t = 0; t < getMaxThreadLaufen(); ++t) {
                Thread th = new ThemaLaden();
                th.setName(SENDERNAME + t);
                th.start();
            }
        }
    }

    private class ThemaLaden extends Thread {

        private final GetUrl getUrl = new GetUrl(getWartenSeiteLaden());

        private MSStringBuilder overviewPageFilm = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
        private MSStringBuilder filmPage = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
        private MSStringBuilder m3u8Page = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
        private final ArrayList<String> urlList = new ArrayList<>();
        private final ArrayList<String> filmList = new ArrayList<>();

        @Override
        public void run() {
            try {
                meldungAddThread();
                String link[];

                while (!Config.getStop() && (link = listeThemen.getListeThemen()) != null) {
                    meldungProgress(link[0] /* url */);
                    addFilme(link[0]/*url*/, link[1]/*thema*/);
                }
            } catch (Exception ex) {
                Log.errorLog(832002877, ex);
            }
            meldungThreadUndFertig();
        }

        private void addFilme(String urlThema, String thema) {

            try {
                String urlFeed = "http://www.srf.ch/play/v2/tv/topic/" + urlThema + "/latest";
                overviewPageFilm = new MSStringBuilder();
                overviewPageFilm.append(extractSrfTopicData(
                        getUrl.getUri_Utf(SENDERNAME, urlFeed, overviewPageFilm, "").substring(0)).toCharArray());
                addFilmsFromPage(overviewPageFilm, thema, urlFeed);
                //Not possible actually.
                /*if (CrawlerTool.loadLongMax())
                {
                String url = urlFeed.substring(0, urlFeed.indexOf("&pageNumber=1"));
                for (int i = 2; i <= MAX_SEITEN_THEMA; ++i)
                {
                    if (overviewPageFilm.indexOf("pageNumber=" + i) == -1)
                    {
                        break;
                    } else
                    {
                        // dann gibts weitere Seiten
                        overviewPageFilm = getUrl.getUri_Utf(SENDERNAME, url + "&pageNumber=" + i + "&layout=json", overviewPageFilm, "");
                        addFilmsFromPage(overviewPageFilm, thema, urlThema);
                    }
                }
                }*/
            } catch (Exception ex) {
                Log.errorLog(195926364, ex);
            }
        }

        private String extractSrfTopicData(final String aPageContent) {
            int dataBlockBegin = aPageContent.indexOf(SRF_DATA_BLOCK_BEGIN_PATTERN)
                    + SRF_DATA_BLOCK_BEGIN_PATTERN.length();
            String tempData = aPageContent.substring(dataBlockBegin);
            return StringEscapeUtils.unescapeHtml4(tempData.substring(0, tempData.indexOf(SRF_DATA_BLOCK_END)));
        }

        private void addFilmsFromPage(MSStringBuilder page, String thema, String themePageUrl) {
            final String baseUrlJson = "http://il.srgssr.ch/integrationlayer/1.0/ue/srf/video/play/";
            final String endUrlJson = ".jsonp";
            int count = 0;
            filmList.clear();
            page.extractList("{\"id\":\"", "?id=", "\"", filmList);

            for (String id : filmList) {
                if (Config.getStop()) {
                    break;
                }
                ++count;
                if (!CrawlerTool.loadLongMax() && count > MAX_FILME_KURZ) {
                    break;
                }
                //http://www.srf.ch/play/tv/episodesfromshow?id=c38cc259-b5cd-4ac1-b901-e3fddd901a3d&pageNumber=1&layout=json
                String jsonMovieUrl = baseUrlJson + id + endUrlJson;
                addFilms(jsonMovieUrl, themePageUrl, thema);
            }
        }

        /**
         * This method adds the films from the json file to the film list
         *
         * @param jsonMovieUrl the json url of the film
         * @param themePageUrl the website url of the film
         * @param theme        the theme name of the film
         */
        private void addFilms(String jsonMovieUrl, String themePageUrl, String theme) {
            final String index0 = "index_0_av.m3u8";
            final String index1 = "index_1_av.m3u8";
            final String index2 = "index_2_av.m3u8";
            final String index3 = "index_3_av.m3u8";
            final String index4 = "index_4_av.m3u8";
            final String index5 = "index_5_av.m3u8";

            meldung(jsonMovieUrl);

            filmPage = getUrl.getUri_Utf(SENDERNAME, jsonMovieUrl, filmPage, "");
            try {

                String dateStr = "";
                String time = "";
                Date date = extractDateAndTime(filmPage);
                if (date != null) {
                    DateFormat dfDayMonthYear = new SimpleDateFormat("dd.MM.yyyy");
                    dateStr = dfDayMonthYear.format(date);
                    dfDayMonthYear = new SimpleDateFormat("HH:mm:ss");
                    time = dfDayMonthYear.format(date);
                }

                long duration = extractDuration(filmPage);
                String description = filmPage.extract("\"description\": \"", "\"");
                description = StringEscapeUtils.unescapeJava(description).trim();
                String title = filmPage.extract("AssetMetadatas", "\"title\": \"", "\"");

                String urlThema = filmPage.extract("\"homepageUrl\": \"", "\"");
                if (urlThema.isEmpty()) {
                    urlThema = "http://www.srf.ch/play/tv/sendungen";
                }
                String subtitle = filmPage.extract("\"TTMLUrl\": \"", "\"");

                String urlHD = getUrl(filmPage, "\"@quality\": \"HD\"", "\"text\": \"");
                String urlNormal = getUrl(filmPage, "\"@quality\": \"SD\"", "\"text\": \"");
                String urlSmall = "";

                String url3u8 = urlHD.endsWith("m3u8") ? urlHD : urlNormal;

                if (url3u8.endsWith("m3u8")) {
                    getM3u8(url3u8);
                    if (url3u8.contains("q50,q60")) {
                        if (m3u8Page.indexOf(index5) != -1) {
                            urlHD = getUrlFromM3u8(url3u8, index5);
                        }
                        if (m3u8Page.indexOf(index4) != -1) {
                            urlNormal = getUrlFromM3u8(url3u8, index4);
                        } else if (m3u8Page.indexOf(index3) != -1) {
                            urlNormal = getUrlFromM3u8(url3u8, index3);
                        }
                        if (m3u8Page.indexOf(index2) != -1) {
                            urlSmall = getUrlFromM3u8(url3u8, index2);
                        } else if (m3u8Page.indexOf(index1) != -1) {
                            urlSmall = getUrlFromM3u8(url3u8, index1);
                        }
                    } else {
                        if (m3u8Page.indexOf(index0) != -1) {
                            urlNormal = getUrlFromM3u8(url3u8, index0);
                        }
                        if (m3u8Page.indexOf(index3) != -1) {
                            urlSmall = getUrlFromM3u8(url3u8, index3);
                        } else if (m3u8Page.indexOf(index2) != -1) {
                            urlSmall = getUrlFromM3u8(url3u8, index2);
                        }
                    }
                }

                if (urlNormal.isEmpty() && !urlSmall.isEmpty()) {
                    urlNormal = urlSmall;
                    urlSmall = "";
                }
                // https -> http
                if (urlNormal.startsWith(HTTPS)) {
                    urlNormal = urlNormal.replaceFirst(HTTPS, HTTP);
                }
                if (urlSmall.startsWith(HTTPS)) {
                    urlSmall = urlSmall.replaceFirst(HTTPS, HTTP);
                }
                if (urlHD.startsWith(HTTPS)) {
                    urlHD = urlHD.replaceFirst(HTTPS, HTTP);
                }

                if (urlNormal.isEmpty()) {
                    Log.errorLog(962101451, "Keine URL: " + jsonMovieUrl);
                } else {
                    urlHD = checkUrl(urlHD);
                    urlNormal = checkUrl(urlNormal);
                    urlSmall = checkUrl(urlSmall);

                    DatenFilm film = new DatenFilm(SENDERNAME, theme, urlThema, title, urlNormal, ""/*rtmpURL*/,
                            dateStr, time, duration, description);

                    if (!urlHD.isEmpty()) {
                        CrawlerTool.addUrlHd(film, urlHD, "");
                    }
                    if (!urlSmall.isEmpty()) {
                        CrawlerTool.addUrlKlein(film, urlSmall, "");
                    }
                    if (!subtitle.isEmpty()) {
                        CrawlerTool.addUrlSubtitle(film, subtitle);
                    }
                    addFilm(film);
                }
            } catch (Exception ex) {
                Log.errorLog(556320087, ex);
            }
        }

        private void getM3u8(String url3u8) {
            m3u8Page = getUrl.getUri_Utf(SENDERNAME, url3u8, m3u8Page, "");
            if (m3u8Page.length() == 0 && url3u8.startsWith(URL1_M3U8)) {
                // tauschen https://srfvodhd-vh.akamaihd.net http://hdvodsrforigin-f.akamaihd.net
                // ist ein 403
                String url3u8Temp;
                url3u8Temp = url3u8.replaceFirst(URL1_M3U8, URL3_M3U8);
                m3u8Page = getUrl.getUri_Utf(SENDERNAME, url3u8Temp, m3u8Page, "");
            }
            if (m3u8Page.length() == 0 && url3u8.startsWith(URL2_M3U8)) {
                // tauschen https://srfvodhd-vh.akamaihd.net http://hdvodsrforigin-f.akamaihd.net
                // ist ein 403
                String url3u8Temp;
                url3u8Temp = url3u8.replaceFirst(URL2_M3U8, URL3_M3U8);
                m3u8Page = getUrl.getUri_Utf(SENDERNAME, url3u8Temp, m3u8Page, "");
            }
        }

        private String checkUrl(String url) {
            // tauschen https://srfvodhd-vh.akamaihd.net http://hdvodsrforigin-f.akamaihd.net
            // ist ein 403
            String urlTemp;
            urlTemp = url.replaceFirst(URL1_M3U8, URL3_M3U8);
            return urlTemp.replaceFirst(URL2_M3U8, URL3_M3U8);
        }

        private String getUrl(MSStringBuilder filmPage, String s1, String s2) {
            String url = "";
            String m3u8 = "";
            urlList.clear();
            filmPage.extractList(s1, s2, "\"", urlList);

            for (String u : urlList) {
                if (!u.endsWith("m3u8") && !u.endsWith("mp4")) {
                    continue;
                }
                if (u.endsWith("m3u8")) {
                    m3u8 = u;
                }
                if (u.endsWith("mp4")) {
                    url = u;
                    break;
                }
            }
            if (url.isEmpty()) {
                url = m3u8;
            }
            return url;
        }

        private String getUrlFromM3u8(String m3u8Url, String qualityIndex) {
            final String csmil = "csmil/";
            return m3u8Url.substring(0, m3u8Url.indexOf(csmil)) + csmil + qualityIndex;
        }

        private long extractDuration(MSStringBuilder page) {
            long duration = 0;
            final String patternDuration = "\"duration\":";
            String d = page.extract(patternDuration, ",").trim();
            try {
                if (!d.isEmpty()) {
                    duration = Long.parseLong(d);
                    duration = duration / 1000; //ms
                }
            } catch (NumberFormatException ex) {
                Log.errorLog(646490237, ex);
            }
            return duration;
        }

        private Date extractDateAndTime(MSStringBuilder page) {
            final String patternDateTime = "\"publishedDate\": \"";
            final String patternEnd = "\"";
            DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX");//2014-12-12T09:45:00+02:00

            String dateStr = page.extract("\"AssetMetadatas\"", patternDateTime, patternEnd);

            Date date = null;
            try {
                date = formatter.parse(dateStr);
            } catch (ParseException ex) {
                Log.errorLog(784512304, ex, "Date_STR " + dateStr);
            }

            return date;
        }

    }
}