youtube.transcription.Video.java Source code

Java tutorial

Introduction

Here is the source code for youtube.transcription.Video.java

Source

package youtube.transcription;

/*
This file is part of Google2SRT.
    
Google2SRT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
any later version.
    
Google2SRT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
    
You should have received a copy of the GNU General Public License
along with Google2SRT.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 *
 * @author kom
 * @author Zoltan Kakuszi
 * @version "0.7.3, 04/08/15"
 */

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.net.*;
import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
import org.apache.commons.io.IOUtils;
import org.jdom.Attribute;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import java.net.Proxy;

public class Video {
    public static class HostNoGV extends Exception {
    };

    public static class NoDocId extends Exception {
    };

    public static class NoQuery extends Exception {
    };

    public static class InvalidDocId extends Exception {
    };

    public static class NoSubs extends Exception {
    };

    public static class NoYouTubeParamV extends Exception {
    };

    private String _id = null; // "docid" (Google Video) o "v" (YouTube)
    private String _magicURL = "";
    private String _title = "";
    private HashMap<String, String> _params;
    private NetSubtitle.Method _method;
    private String YouTubeWebSource;
    private String _URL;
    private List<List<NetSubtitle>> _subsWT;
    private Proxy proxy;

    public Video(String URL) {
        _subsWT = new ArrayList<List<NetSubtitle>>();
        _URL = URL;
    }

    public String getMagicURL() {
        return _magicURL;
    }

    public String getURL() {
        return _URL;
    }

    public String getTitle() {
        return _title;
    }

    public String getId() {
        return _id;
    }

    public HashMap<String, String> getParams() {
        return _params;
    }

    public NetSubtitle.Method getMethod() {
        return _method;
    }

    public void setMethod(NetSubtitle.Method method) {
        _method = method;
    }

    public void setProxy(String hostAddress, int port) { // Create Proxy for the current Video object
        proxy = new Proxy(Proxy.Type.SOCKS, new InetSocketAddress(hostAddress, port));
    }

    public List<NetSubtitle> getSubtitles() throws MalformedURLException, HostNoGV, NoQuery, NoDocId, InvalidDocId,
            UnsupportedEncodingException, JDOMException, IOException, NoSubs, NoYouTubeParamV {
        if (_subsWT.isEmpty())
            getSubtitlesWithTranslations(); // _subsWT = getSubtitlesWithTranslations(URL)

        return _subsWT.get(0);
    }

    public List<List<NetSubtitle>> getSubtitlesWithTranslations()
            throws MalformedURLException, HostNoGV, NoQuery, NoDocId, InvalidDocId, UnsupportedEncodingException,
            JDOMException, IOException, NoSubs, NoYouTubeParamV {
        String urlList;
        URL url;
        Document xmlDoc;
        List<NetSubtitle> lTracks;
        List<List<NetSubtitle>> result;

        // Already retrieved
        if (!_subsWT.isEmpty())
            return _subsWT;

        url = new URL(_URL);
        result = new ArrayList<List<NetSubtitle>>();

        if (url.getHost() == null) {
            throw new HostNoGV();
        } else if (url.getHost().indexOf("video.google.com") != -1) {
            _params = getURLParams(_URL);
            setMethod(NetSubtitle.Method.Google);
            urlList = NetSubtitle.getListURL(getMethod(), getParams());
            xmlDoc = readListURL(urlList);
            lTracks = getListSubs(xmlDoc, getParams());
            result = new ArrayList<List<NetSubtitle>>();
            result.add(lTracks);
            result.add(new ArrayList<NetSubtitle>());
        } else if (url.getHost().indexOf("youtube.com") != -1 || url.getHost().indexOf("youtu.be") != -1) {

            if (url.getHost().indexOf("youtu.be") != -1) {
                // http://youtu.be/c8RGPpcenZY => https://www.youtube.com/watch?v=c8RGPpcenZY
                String s;
                try {
                    s = url.getFile();
                } catch (Exception e) {
                    s = " ";
                }

                url = new URL("https://www.youtube.com/watch?v=" + s.substring(1, s.length()));
                _URL = url.toString();
            } else {
                // http://www.youtube.com/watch?v=c8RGPpcenZY => https://www.youtube.com/watch?v=c8RGPpcenZY

                url = new URL(url.toString().replace("http://", "https://"));
                _URL = url.toString();
            }

            if (Settings.DEBUG)
                System.out.println("(DEBUG) Final video URL: " + _URL);
            try {
                _magicURL = retrieveMagicURL(_URL);
                _title = retrieveVideoTitle();
                _params = getURLParams(getMagicURL());
                setMethod(NetSubtitle.Method.YouTubeSignature);
                urlList = NetSubtitle.getListURL(getMethod(), getParams());
                xmlDoc = readListURL(urlList);
                result = getListSubsWithTranslations(xmlDoc, getParams(), getMethod());
            } catch (Exception ex) {
                if (Settings.DEBUG)
                    System.out.println("(DEBUG) Exception reading via Signature mode. Switching to Legacy mode...");
                _magicURL = "";
                _title = "";
                _params = getURLParams(_URL);
                setMethod(NetSubtitle.Method.YouTubeLegacy);
                urlList = NetSubtitle.getListURL(getMethod(), getParams());
                xmlDoc = readListURL(urlList);
                result = getListSubsWithTranslations(xmlDoc, getParams(), getMethod());
            }

        } else {
            throw new HostNoGV();
        }

        _subsWT = result;
        return result;
    }

    public String retrieveMagicURL(String YouTubeURL) throws MalformedURLException, IOException {

        String magicURL;
        InputStreamReader isr;

        isr = readURL(YouTubeURL);
        YouTubeWebSource = readURL(isr);
        magicURL = NetSubtitle.getMagicURL(YouTubeWebSource);

        if (Settings.DEBUG)
            System.out.println("(DEBUG) *Magic* URL: " + magicURL);
        return magicURL;
    }

    public String retrieveVideoTitle() {
        return (YouTubeWebSource != null) ? NetSubtitle.getVideoTitleFromSource(YouTubeWebSource) : "";
    }

    public HashMap<String, String> getURLParams(String URL) throws MalformedURLException {
        URL url;
        String[] sparams;
        HashMap<String, String> mparams;
        String name, value;
        String[] as;

        url = new URL(URL);
        sparams = url.getQuery().split("&");
        mparams = new HashMap<String, String>();
        for (String param : sparams) {
            as = param.split("=");
            if (as.length > 1) {
                name = as[0];
                value = as[1];
            } else if (as.length > 0) {
                name = as[0];
                value = "";
            } else {
                name = "";
                value = "";
            }

            mparams.put(name, value);
        }

        return mparams;
    }

    private Document readListURL(String url) throws MalformedURLException, JDOMException, IOException {
        SAXBuilder parser = new SAXBuilder();
        InputStreamReader isr;

        isr = readURL(url);
        return parser.build(isr);
    }

    private List<NetSubtitle> getListSubs(Document xml, HashMap<String, String> params)
            throws NoSubs, UnsupportedEncodingException {
        return getListSubsWithTranslations(xml, params, NetSubtitle.Method.Google).get(0);
    }

    private List<List<NetSubtitle>> getListSubsWithTranslations(Document xml, HashMap<String, String> params,
            NetSubtitle.Method method) throws NoSubs, UnsupportedEncodingException {
        Element arrel, track;
        List<Element> tracks;
        int tam, i, tmpInt;
        Attribute tmpAtt;
        String tmpS, sName, sLang, sLangOrig, sLangTrans;
        List<NetSubtitle> lTracks = new ArrayList<NetSubtitle>();
        List<NetSubtitle> lTargets = new ArrayList<NetSubtitle>();
        List<List<NetSubtitle>> resultat;
        NetSubtitle tNS;

        if (xml == null)
            throw new NoSubs();

        arrel = xml.getRootElement();
        tmpAtt = arrel.getAttribute("docid");
        if (tmpAtt == null)
            throw new NoSubs();

        tracks = arrel.getChildren();
        tam = tracks.size();
        if (tam == 0)
            return null;
        i = 0;
        while (i < tam) {
            track = tracks.get(i);
            if (track != null) {
                tmpAtt = track.getAttribute("id");
                if (tmpAtt != null) {
                    tmpS = tmpAtt.getValue();
                    tmpInt = Integer.valueOf(tmpS);

                    //<track id="0" name="" lang_code="ca" lang_original="Catal" lang_translated="Catalan" cantran="true"/>
                    //<target id="42" lang_code="ca" lang_original="Catal" lang_translated="Catalan"/>

                    tmpAtt = track.getAttribute("lang_code");
                    sLang = tmpAtt.getValue();
                    tmpAtt = track.getAttribute("lang_original");
                    sLangOrig = tmpAtt.getValue();
                    tmpAtt = track.getAttribute("lang_translated");
                    sLangTrans = tmpAtt.getValue();

                    tNS = new NetSubtitle(this);
                    switch (method) {
                    case Google:
                        _id = params.get("docid"); //tNS.setId(params.get("docid"));
                        break;
                    case YouTubeLegacy:
                    case YouTubeSignature:
                        _id = params.get("v"); // tNS.setId(params.get("v"));
                        break;
                    default:
                        _id = ""; // tNS.setId("");
                    }
                    tNS.setIdXML(tmpInt);
                    tNS.setLang(sLang);
                    tNS.setLangOriginal(sLangOrig);
                    tNS.setLangTranslated(sLangTrans);

                    tmpS = track.getName();
                    if ("track".equals(tmpS)) {
                        if ((tmpAtt = track.getAttribute("kind")) != null && (tmpS = tmpAtt.getValue()) != null
                                && "asr".equals(tmpS)) {
                            tNS.setType(NetSubtitle.Tipus.YouTubeASRTrack);
                        } else {
                            tmpAtt = track.getAttribute("name");
                            sName = tmpAtt.getValue();
                            tNS.setName(sName);
                            tNS.setType(NetSubtitle.Tipus.YouTubeTrack);
                        }
                        tNS.setTrack(true);
                        lTracks.add(tNS);
                    } else if ("target".equals(tmpS)) {
                        tNS.setType(NetSubtitle.Tipus.YouTubeTarget);
                        lTargets.add(tNS);
                    }
                }
            }
            i++;
        }

        resultat = new ArrayList<List<NetSubtitle>>();
        resultat.add(lTracks);
        resultat.add(lTargets);

        return resultat;
    }

    public InputStreamReader readURL(String s) throws MalformedURLException, IOException {
        URL url;
        InputStreamReader isr;
        String appName, appVersion;
        URLConnection urlconn;

        appName = java.util.ResourceBundle.getBundle("Bundle").getString("app.name");
        appVersion = java.util.ResourceBundle.getBundle("Bundle").getString("app.version");

        url = new URL(s);
        if (proxy != null) {
            urlconn = url.openConnection(proxy);
        } else {
            urlconn = url.openConnection();
        }
        urlconn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
        urlconn.setRequestProperty("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7");
        urlconn.setRequestProperty("User-Agent", "Mozilla/5.0 (compatible; " + appName + "/" + appVersion + ")");
        urlconn.connect();

        isr = new InputStreamReader(urlconn.getInputStream(), "UTF-8");
        return isr;
    }

    public String readURL(InputStreamReader isr) throws IOException {
        String s;

        StringWriter writer = new StringWriter();
        IOUtils.copy(isr, writer);
        s = writer.toString();

        return s;
    }

}