mServer.crawler.sender.MediathekZdfTivi.java Source code

Java tutorial

Introduction

Here is the source code for mServer.crawler.sender.MediathekZdfTivi.java

Source

/*
 * MediathekView
 * Copyright (C) 2008 W. Xaver
 * W.Xaver[at]googlemail.com
 * http://zdfmediathk.sourceforge.net/
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
package mServer.crawler.sender;

import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.commons.lang3.time.FastDateFormat;

import de.mediathekview.mlib.Config;
import de.mediathekview.mlib.Const;
import de.mediathekview.mlib.daten.DatenFilm;
import de.mediathekview.mlib.tool.Log;
import de.mediathekview.mlib.tool.MSStringBuilder;
import mServer.crawler.FilmeSuchen;
import mServer.crawler.GetUrl;
import mServer.tool.MserverDaten;

public class MediathekZdfTivi extends MediathekReader {

    public final static String SENDERNAME = Const.ZDF_TIVI;
    private final SimpleDateFormat sdfIn = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
    private final FastDateFormat sdfOut_date = FastDateFormat.getInstance("dd.MM.yyyy");
    private final FastDateFormat sdfOut_time = FastDateFormat.getInstance("HH:mm:ss");
    private final LinkedListUrl listeThemen_3 = new LinkedListUrl();

    public MediathekZdfTivi(FilmeSuchen ssearch, int startPrio) {
        super(ssearch, SENDERNAME, 2 /* threads */, 50 /* urlWarten */, startPrio);
    }

    @Override
    public synchronized void addToList() {
        //Theman suchen
        listeThemen.clear();
        meldungStart();
        add_1();
        add_2();
        add_3();
        if (Config.getStop()) {
            meldungThreadUndFertig();
        } else if (listeThemen.isEmpty() && listeThemen_3.isEmpty()) {
            meldungThreadUndFertig();
        } else {
            meldungAddMax(listeThemen.size() + listeThemen_3.size());
            for (int t = 0; t < getMaxThreadLaufen(); ++t) {
                Thread th = new ThemaLaden();
                th.setName(SENDERNAME + t);
                th.start();
            }
        }
    }

    private void add_1() {
        //<ns3:headline>Nachrichten</ns3:headline>
        //<ns3:image>/tiviVideos/contentblob/2063212/tivi9teaserbild/9050138</ns3:image>
        //<ns3:page>/tiviVideos/beitrag/pur%2B+Sendungen/895212/2063212?view=flashXml</ns3:page>
        //<ns3:text>Ich will die Wahrheit!</ns3:text>
        final String MUSTER_URL = "<ns3:page>/tiviVideos";
        MSStringBuilder seiteTivi_1 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
        GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
        seiteTivi_1 = getUrlIo.getUri(SENDERNAME, "http://www.tivi.de/tiviVideos/rueckblick?view=flashXml",
                StandardCharsets.UTF_8, 6 /* versuche */, seiteTivi_1, "" /* Meldung */);
        if (seiteTivi_1.length() == 0) {
            Log.errorLog(732323698, "Leere Seite");
        }
        int pos = 0;
        int pos1;
        int pos2;
        String url;
        try {
            while ((pos = seiteTivi_1.indexOf(MUSTER_URL, pos)) != -1) {
                url = "";
                pos += MUSTER_URL.length();
                pos1 = pos;
                if ((pos2 = seiteTivi_1.indexOf("<", pos1)) != -1) {
                    url = seiteTivi_1.substring(pos1, pos2);
                    if (url.contains("%2F")) {
                        url = url.replace("%2F", "/");
                    }
                    //                    url = URLDecoder.decode(url, "UTF-8");
                }
                if (url.isEmpty()) {
                    Log.errorLog(309075109, "keine URL");
                } else {
                    url = "http://www.tivi.de/tiviVideos" + url;
                    listeThemen.addUrl(new String[] { url });
                }
            }
        } catch (Exception ex) {
            Log.errorLog(302010498, ex);
        }
    }

    private void add_2() {
        //<ns3:headline>Nachrichten</ns3:headline>
        //<ns3:image>/tiviVideos/contentblob/2063212/tivi9teaserbild/9050138</ns3:image>
        //<ns3:page>/tiviVideos/beitrag/pur%2B+Sendungen/895212/2063212?view=flashXml</ns3:page>
        //<ns3:text>Ich will die Wahrheit!</ns3:text>
        final String MUSTER_URL = "<ns3:page>/tiviVideos/beitrag";
        MSStringBuilder seiteTivi_1 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
        GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
        seiteTivi_1 = getUrlIo.getUri(SENDERNAME, "http://www.tivi.de/tiviVideos/?view=flashXml",
                StandardCharsets.UTF_8, 6 /* versuche */, seiteTivi_1, "" /* Meldung */);
        ///seiteTivi_1 = getUrl.getUri(nameSenderMReader, "http://www.tivi.de/tiviVideos/?view=xml", MSearchConst.KODIERUNG_UTF, 6 /* versuche */, seiteTivi_1, "" /* Meldung */);
        if (seiteTivi_1.length() == 0) {
            Log.errorLog(645121326, "Leere Seite");
        }
        int pos = 0;
        int pos1;
        int pos2;
        String url;
        try {
            while ((pos = seiteTivi_1.indexOf(MUSTER_URL, pos)) != -1) {
                url = "";
                pos += MUSTER_URL.length();
                pos1 = pos;
                if ((pos2 = seiteTivi_1.indexOf("<", pos1)) != -1) {
                    url = seiteTivi_1.substring(pos1, pos2);
                    if (url.contains("%2F")) {
                        url = url.replace("%2F", "/");
                    }
                    //                    url = URLDecoder.decode(url, "UTF-8");
                }
                if (url.isEmpty()) {
                    Log.errorLog(915263985, "keine URL");
                } else {
                    url = "http://www.tivi.de/tiviVideos/beitrag" + url;
                    listeThemen.addUrl(new String[] { url });
                }
            }
        } catch (Exception ex) {
            Log.errorLog(730169702, ex);
        }
    }

    private void add_3() {
        final String MUSTER_URL = "type=\"broadcast\">";
        MSStringBuilder seiteTivi_1 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
        GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
        seiteTivi_1 = getUrlIo.getUri(SENDERNAME, "http://www.tivi.de/tiviVideos/navigation?view=flashXml",
                StandardCharsets.UTF_8, 6 /* versuche */, seiteTivi_1, "" /* Meldung */);
        if (seiteTivi_1.length() == 0) {
            Log.errorLog(195623078, "Leere Seite");
        }
        int pos = 0;
        int pos1;
        int pos2;
        String url;
        try {
            while ((pos = seiteTivi_1.indexOf(MUSTER_URL, pos)) != -1) {
                url = "";
                pos += MUSTER_URL.length();
                pos1 = pos;
                if ((pos2 = seiteTivi_1.indexOf("<", pos1)) != -1) {
                    url = seiteTivi_1.substring(pos1, pos2);
                    if (url.contains("%2F")) {
                        url = url.replace("%2F", "/");
                    }
                    //                    url = URLDecoder.decode(url, "UTF-8");
                }
                if (url.isEmpty()) {
                    Log.errorLog(152378787, "keine URL");
                } else {
                    url = "http://www.tivi.de" + url;
                    listeThemen_3.addUrl(new String[] { url });
                }
            }
        } catch (Exception ex) {
            Log.errorLog(906037912, ex);
        }
    }

    private class ThemaLaden extends Thread {

        private final GetUrl getUrl = new GetUrl(getWartenSeiteLaden());
        private MSStringBuilder seite1 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
        private MSStringBuilder seite2 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);

        @Override
        public void run() {
            try {
                meldungAddThread();
                String[] link;
                while (!Config.getStop() && (link = listeThemen.getListeThemen()) != null) {
                    addTivi_(link[0] /* url */);
                    meldungProgress(link[0]);
                }
                while (!Config.getStop() && (link = listeThemen_3.getListeThemen()) != null) {
                    add_(link[0] /* url */);
                    meldungProgress(link[0]);
                }
            } catch (Exception ex) {
                Log.errorLog(731214569, ex);
            }
            meldungThreadUndFertig();
        }

        private void add_(String url_) {
            final String MUSTER_START = "<ns3:video-teaser>";
            GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
            seite2 = getUrlIo.getUri(SENDERNAME, url_, StandardCharsets.UTF_8, 1 /* versuche */, seite2,
                    "" /* Meldung */);
            if (seite2.length() == 0) {
                Log.errorLog(302010698, "Leere Seite");
            }
            int pos = 0;
            String url;
            try {
                while (!Config.getStop() && (pos = seite2.indexOf(MUSTER_START, pos)) != -1) {
                    pos += MUSTER_START.length();
                    url = seite2.extract("<ns3:page>", "<", pos);
                    //                    url = URLDecoder.decode(url, "UTF-8");
                    if (url.isEmpty()) {
                        Log.errorLog(732698720, "keine URL");
                    } else {
                        if (url.contains("%2F")) {
                            url = url.replace("%2F", "/");
                        }
                        url = "http://www.tivi.de" + url;
                        addTivi_(url);
                    }
                }
            } catch (Exception ex) {
                Log.errorLog(701212145, ex);
            }
        }

        private void addTivi_(String url) {
            int pos3;
            long dauerL;
            String titel, thema, urlFilm, datum, zeit, bild, website, text;
            try {
                urlFilm = "";
                // Film laden
                meldung(url);
                seite1 = getUrl.getUri_Utf(SENDERNAME, url, seite1, "" /* Meldung */);
                if (seite1.length() == 0) {
                    Log.errorLog(301649897, "Leere Seite Tivi-2: " + url);
                    return;
                }
                thema = seite1.extract("<title>", "<");
                titel = seite1.extract("<subtitle>", "<");
                text = seite1.extract("<text>", "<");
                bild = seite1.extract("<image>", "<");
                if (!bild.isEmpty()) {
                    bild = "http://www.tivi.de" + bild;
                }
                website = seite1.extract("<link>", "<");
                dauerL = parseDauer(seite1, url);

                zeit = "";
                datum = seite1.extract("<airTime>", "<");
                //<airTime>2014-01-19T08:35:00.000+01:00</airTime>
                if (!datum.isEmpty()) {
                    try {
                        Date filmDate = sdfIn.parse(datum);
                        datum = sdfOut_date.format(filmDate);
                        zeit = sdfOut_time.format(filmDate);
                    } catch (NumberFormatException ex) {
                        if (MserverDaten.debug)
                            Log.errorLog(649600299, ex, "Datum: " + url);
                    }
                }
                pos3 = 0;
                while ((pos3 = seite1.indexOf("<ns4:quality>veryhigh</ns4:quality>", pos3)) != -1) {
                    pos3 += 5;
                    urlFilm = seite1.extract("<ns4:url>", "<", pos3);
                    if (urlFilm.startsWith("http") && urlFilm.endsWith("mp4")
                            && !urlFilm.contains("metafilegenerator")) {
                        break;
                    }
                }
                if (urlFilm.isEmpty()) {
                    Log.errorLog(159876234, "kein Film: " + url);
                } else {
                    if (urlFilm.startsWith("http://tvdl.zdf.de")) {
                        urlFilm = urlFilm.replace("http://tvdl.zdf.de", "http://nrodl.zdf.de");
                    }

                    DatenFilm film = new DatenFilm(SENDERNAME, thema, website, titel, urlFilm, "" /*urlRtmp*/,
                            datum, zeit, dauerL, text);
                    // jetzt noch manuell die Auflsung hochsetzen
                    MediathekZdf.urlTauschen(film, url, mlibFilmeSuchen);
                    addFilm(film);
                }
            } catch (Exception ex) {
                Log.errorLog(454123698, ex);
            }
        }

        private long parseDauer(MSStringBuilder seite, String url) {
            long dauer = 0;

            try {
                String duration = seite.extract("<duration>", "<");
                if (!duration.isEmpty()) {
                    dauer = parseDuration(duration);
                }
                if (dauer == 0) {
                    String length = seite.extract("<lengthSec>", "<");
                    dauer = Integer.parseInt(length);
                }
            } catch (NumberFormatException ex) {
                Log.errorLog(349761012, ex, "Dauer: " + url);
            }

            return dauer;
        }

        // Parst Lngenangaben im Format P0Y0M0DT0H22M6.000S
        private long parseDuration(String duration) {

            String time = duration.replace("P0Y0M0DT", "");
            String h = time.substring(0, time.indexOf('H'));
            int ih = Integer.parseInt(h);
            String m = time.substring(time.indexOf('H') + 1, time.indexOf('M'));
            int im = Integer.parseInt(m);
            String s = time.substring(time.indexOf('M') + 1, time.indexOf('.'));
            int is = Integer.parseInt(s);

            return ih * 60 * 60 + im * 60 + is;
        }
    }

}