neembuu.release1.externalImpl.linkhandler.YoutubeLinkHandlerProvider.java Source code

Java tutorial

Introduction

Here is the source code for neembuu.release1.externalImpl.linkhandler.YoutubeLinkHandlerProvider.java

Source

/*
 *  Copyright (C) 2014 Davide Pastore
 * 
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 * 
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package neembuu.release1.externalImpl.linkhandler;

import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import neembuu.release1.api.file.OnlineFile;
import neembuu.release1.api.file.PropertyProvider;
import neembuu.release1.api.linkhandler.LinkHandler;
import neembuu.release1.api.linkhandler.LinkHandlerProvider;
import neembuu.release1.api.linkhandler.TrialLinkHandler;
import neembuu.release1.captcha.Captcha;
import neembuu.release1.defaultImpl.file.BasicOnlineFile;
import neembuu.release1.defaultImpl.file.BasicPropertyProvider;
import neembuu.release1.httpclient.NHttpClient;
import neembuu.release1.httpclient.utils.NHttpClientUtils;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import davidepastore.StringUtils;
import java.net.URLEncoder;
import neembuu.release1.api.log.LoggerUtil;
import neembuu.release1.defaultImpl.external.ELHProvider;
import neembuu.release1.defaultImpl.linkhandler.BasicLinkHandler;
import neembuu.release1.defaultImpl.linkhandler.Utils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 *
 * @author davidepastore
 */
@ELHProvider(checkingRegex = YoutubeLinkHandlerProvider.REG_EXP)
public class YoutubeLinkHandlerProvider implements LinkHandlerProvider {
    private static final Logger LOGGER = LoggerUtil.getLogger(YoutubeLinkHandlerProvider.class.getName()); // all logs go into an html file

    private final String K_CHALLENGE_URL = "https://www.google.com/recaptcha/api/challenge?k=";
    private final String K_CHALLENGE_CODE = "6LcVessSAAAAAH73irTtpZYKknjeBvN3nuUzJ2G3";

    static final String REG_EXP = "https?://(www.youtube.com/watch\\?(feature=player_embedded&)?v=|youtu.be/)([\\w\\-\\_]*)(&(amp;)?[\\w\\?=((\\w)|(\\W))]*)?";

    @Override
    public TrialLinkHandler tryHandling(final String url) {
        return new YT_TLH(url);
    }

    @Override
    public LinkHandler getLinkHandler(TrialLinkHandler tlh) throws Exception {
        if (!(tlh instanceof YT_TLH) || !tlh.canHandle()) {
            return null;
        }
        BasicLinkHandler.Builder linkHandlerBuilder = clipConverterExtraction(tlh); //linkYoutubeExtraction(tlh);
        return linkHandlerBuilder.build();
    }

    private void xzz() {
    }

    /**
     * Grab the title.
     * @param text 
     * @param url
     */
    private void grabTitle(String text, String url) {

        String grabbedTitle; /*= text.replaceFirst("(.*)<meta name=\"title\" content=", "").trim();
                             // change html characters to their UTF8 counterpart
                             grabbedTitle = (grabbedTitle);
                             grabbedTitle = grabbedTitle.replaceFirst("^\"", "").replaceFirst("\">$", "");
                                 
                             // http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247%28v=vs.85%29.aspx
                             // 
                             grabbedTitle = grabbedTitle.replaceAll("<", "");
                             grabbedTitle = grabbedTitle.replaceAll(">", "");
                             grabbedTitle = grabbedTitle.replaceAll(":", "");
                             grabbedTitle = grabbedTitle.replaceAll("/", " ");
                             grabbedTitle = grabbedTitle.replaceAll("\\\\", " ");
                             grabbedTitle = grabbedTitle.replaceAll("|", "");
                             grabbedTitle = grabbedTitle.replaceAll("\\?", "");
                             grabbedTitle = grabbedTitle.replaceAll("\\*", "");
                             grabbedTitle = grabbedTitle.replaceAll("/", " ");
                             grabbedTitle = grabbedTitle.replaceAll("\"", " ");
                             grabbedTitle = grabbedTitle.replaceAll("%", "");
                             */
        grabbedTitle = StringUtils.stringBetweenTwoStrings(text, "<title>", " - YouTube");

        String contentType = NHttpClientUtils.getContentType(url, NHttpClient.getNewInstance());

        if (contentType.equals("video/webm")) {
            grabbedTitle += ".webm";
        }

        LOGGER.log(Level.INFO, "Title: " + grabbedTitle);

        throw new IllegalStateException("Legacy code");
        //this.filename = grabbedTitle; // complete file name without path 
    }

    /**
     * Find text data.
     * @param text the text.
     * @return a list of the urls.
     */
    private ArrayList<String> findTextData(String text) {
        ArrayList<String> finalUrls = new ArrayList<String>();
        try {
            String encodedUrl = StringUtils.stringBetweenTwoStrings(text, "\"url_encoded_fmt_stream_map\": \"",
                    "\"");
            LOGGER.log(Level.INFO, "encoded url: {0}", encodedUrl);
            LOGGER.log(Level.INFO, "encoded url: " + encodedUrl);

            encodedUrl = encodedUrl.replaceFirst("\".*", "");

            encodedUrl = encodedUrl.replaceFirst("\".*", "");
            encodedUrl = encodedUrl.replace("%25", "%");
            encodedUrl = encodedUrl.replace("\\u0026", "&");
            encodedUrl = encodedUrl.replace("\\", "");
            String[] urls = encodedUrl.split(",");

            for (int i = 0; i < urls.length; i++) {
                String[] fmtUrlPair = urls[i].split("url=http", 2);
                fmtUrlPair[1] = "url=http" + fmtUrlPair[1] + "&" + fmtUrlPair[0];

                fmtUrlPair[0] = fmtUrlPair[1].substring(fmtUrlPair[1].indexOf("itag=") + 5,
                        fmtUrlPair[1].indexOf("itag=") + 5 + 1
                                + (fmtUrlPair[1].matches(".*itag=[0-9]{2}.*") ? 1 : 0)
                                + (fmtUrlPair[1].matches(".*itag=[0-9]{3}.*") ? 1 : 0));
                fmtUrlPair[1] = fmtUrlPair[1].replaceFirst("url=http%3A%2F%2F", "http://");
                fmtUrlPair[1] = fmtUrlPair[1].replaceAll("%3F", "?").replaceAll("%2F", "/").replaceAll("%3B", ";")
                        .replaceAll("%2C", ",").replaceAll("%3D", "=").replaceAll("%26", "&")
                        .replaceAll("%252C", "%2C").replaceAll("sig=", "signature=")
                        .replaceAll("&s=", "&signature=").replaceAll("\\?s=", "?signature=");

                // remove duplicated &itag=xy
                if (StringUtils.countString(fmtUrlPair[1], "itag=") == 2) {
                    //                    LOGGER.log(Level.INFO,"Deleting itag!");
                    fmtUrlPair[1] = fmtUrlPair[1].replaceFirst("itag=[0-9]{1,3}", "");
                }

                //LOGGER.log(Level.INFO,"url[" + i + "]: " + urls[i]);
                LOGGER.log(Level.INFO, "fmtUrlPair[1]: {0}\nfmtUrlPair[0]: {1}",
                        new Object[] { fmtUrlPair[1], fmtUrlPair[0] });
                //                LOGGER.log(Level.INFO,"fmtUrlPair[1]: "+ fmtUrlPair[1] +"\nfmtUrlPair[0]: " + fmtUrlPair[0]);

                finalUrls.add(fmtUrlPair[1]);

            }
        } catch (Exception ex) {
            ex.printStackTrace();
        }

        printUrls(finalUrls);

        //Setting filename
        grabTitle(text, finalUrls.get(0)); //the first quality

        return finalUrls;
    }

    /**
     * Use http://www.linkyoutube.com service to get the urls.
     * @param url the youtube url.
     * @return A BasicLinkHandler.Builder with all the urls found for this video.
     */
    private BasicLinkHandler.Builder linkYoutubeExtraction(TrialLinkHandler tlh) throws Exception {
        return linkYoutubeExtraction(tlh, 0);
    }

    private BasicLinkHandler.Builder linkYoutubeExtraction(TrialLinkHandler tlh, int retryCount) throws Exception {
        String url = tlh.getReferenceLinkString();
        BasicLinkHandler.Builder linkHandlerBuilder = BasicLinkHandler.Builder.create();

        try {
            DefaultHttpClient httpClient = NHttpClient.getNewInstance();
            String requestUrl = "http://www.linkyoutube.com/watch/index.php?video="
                    + URLEncoder.encode(url, "UTF-8");

            final String responseString = NHttpClientUtils.getData(requestUrl, httpClient);

            //Set the group name as the name of the video
            String nameOfVideo = getVideoName(url);

            String fileName = "text";

            linkHandlerBuilder.setGroupName(nameOfVideo);

            long c_duration = -1;

            Document doc = Jsoup.parse(responseString);

            Elements elements = doc.select("#download_links a");

            for (Element element : elements) {
                String singleUrl = element.attr("href");
                fileName = element.text();
                if (!singleUrl.equals("#")) {

                    long length = NHttpClientUtils.calculateLength(singleUrl, httpClient);
                    singleUrl = Utils.normalize(singleUrl);
                    LOGGER.log(Level.INFO, "Normalized URL: " + singleUrl);

                    if (length == 0) {
                        length = NHttpClientUtils.calculateLength(singleUrl, httpClient);
                    }

                    //LOGGER.log(Level.INFO,"Length: " + length);

                    if (length <= 0) {
                        continue;
                        /*skip this url*/ }

                    BasicOnlineFile.Builder fileBuilder = linkHandlerBuilder.createFile();

                    try { // finding video/audio length
                        String dur = StringUtils.stringBetweenTwoStrings(singleUrl, "dur=", "&");
                        long duration = (int) (Double.parseDouble(dur) * 1000);
                        if (c_duration < 0) {
                            c_duration = duration;
                        }
                        fileBuilder.putLongPropertyValue(
                                PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, duration);
                        LOGGER.log(Level.INFO, "dur=" + dur);
                    } catch (NumberFormatException a) {
                        // ignore
                    }

                    try { // finding the quality short name
                        String type = fileName.substring(fileName.indexOf("(") + 1);
                        type = type.substring(0, type.indexOf(")"));
                        fileBuilder.putStringPropertyValue(PropertyProvider.StringProperty.VARIANT_DESCRIPTION,
                                type);
                        LOGGER.log(Level.INFO, "type=" + type);
                    } catch (Exception a) {
                        a.printStackTrace();
                    }

                    fileName = nameOfVideo + " " + fileName;

                    fileBuilder.setName(fileName).setUrl(singleUrl).setSize(length).next();
                }
            }

            for (OnlineFile of : linkHandlerBuilder.getFiles()) {
                long dur = of.getPropertyProvider()
                        .getLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS);
                if (dur < 0 && c_duration > 0 && of.getPropertyProvider() instanceof BasicPropertyProvider) {
                    ((BasicPropertyProvider) of.getPropertyProvider()).putLongPropertyValue(
                            PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, c_duration);
                }
            }

        } catch (Exception ex) {
            int retryLimit = ((YT_TLH) tlh).retryLimit;
            ex.printStackTrace();
            LOGGER.log(Level.INFO, "retry no. = " + retryCount);
            if (retryCount > retryLimit)
                throw ex;
            return linkYoutubeExtraction(tlh, retryCount + 1);
        }

        return linkHandlerBuilder;
    }

    /**
     * Returns the name of the video.
     * @param url The url of the video.
     * @return Returns the title of the video.
     */
    private String getVideoName(String url) throws Exception {
        final String responseString = NHttpClientUtils.getData(url, NHttpClient.getNewInstance());
        Document doc = Jsoup.parse(responseString);
        return doc.select("meta[name=title]").attr("content");
    }

    /**
     * Use cliconverter.cc service to get the urls.
     * @param url the youtube url.
     * @return A BasicLinkHandler.Builder with all the urls found for this video.
     */
    private BasicLinkHandler.Builder clipConverterExtraction(TrialLinkHandler tlh) throws Exception {
        return clipConverterExtraction(tlh, 0);
    }

    private BasicLinkHandler.Builder clipConverterExtraction(TrialLinkHandler tlh, int retryCount)
            throws Exception {
        String url = tlh.getReferenceLinkString();
        BasicLinkHandler.Builder linkHandlerBuilder = BasicLinkHandler.Builder.create();

        try {
            DefaultHttpClient httpClient = NHttpClient.getNewInstance();
            HttpPost httpPost = new HttpPost("http://www.clipconverter.cc/check.php");

            List<NameValuePair> formparams = new ArrayList<NameValuePair>();
            formparams.add(new BasicNameValuePair("mediaurl", url));

            UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8");
            httpPost.setEntity(entity);
            HttpResponse httpResponse = httpClient.execute(httpPost);

            final String responseString = EntityUtils.toString(httpResponse.getEntity());

            JSONObject jSonObject = new JSONObject(responseString);
            //LOGGER.log(Level.INFO,jSonObject);

            if (jSonObject.has("redirect")) {
                int count = retryCount;

                //If captcha is incorrect, add a count
                if (!handleCaptcha(jSonObject)) {
                    count++;
                }

                return clipConverterExtraction(tlh, count);
            }

            JSONArray jSonArray = jSonObject.getJSONArray("url");

            LOGGER.log(Level.INFO, "urls: " + jSonArray);

            //Set the group name as the name of the video
            String nameOfVideo = jSonObject.getString("filename");
            //normalize name of video

            //nameOfVideo = jpfm.util.UniversallyValidFileName.makeUniversallyValidFileName(nameOfVideo);
            linkHandlerBuilder.setGroupName(nameOfVideo);

            // Davide you cannot create a this.fileName field
            // this.filename = jSonObject.getString("filename") + ".mp4";
            // The same YoutubeLinkHandler object will be used for hanlding
            // all Youtube links. We "do" it in different threads in 
            // neembuu.release1.ui.actions.LinkActionsImpl line 128
            // void reAddAction(boolean anotherThread) 

            long c_duration = -1;

            for (int i = 0; i < jSonArray.length(); i++) {
                jSonObject = (JSONObject) jSonArray.get(i);
                String fileName = jSonObject.getString("text");
                LOGGER.log(Level.INFO, "Filename: " + fileName);

                final String extension = jSonObject.getString("filetype").toLowerCase();
                fileName = StringUtils.stringBetweenTwoStrings(fileName, ">", "<");
                fileName = fileName + "." + extension;

                String singleUrl = jSonObject.getString("url");
                //singleUrl = singleUrl.substring(0, singleUrl.indexOf("#"));
                //did some changes, but this doesn't help :(
                LOGGER.log(Level.INFO, "Before normalization URL: " + singleUrl);
                long length = tryFindingSize(singleUrl);
                singleUrl = Utils.normalize(singleUrl);
                LOGGER.log(Level.INFO, "Normalized URL: " + singleUrl);

                if (length == 0) {
                    length = NHttpClientUtils.calculateLength(singleUrl, httpClient);
                }
                //LOGGER.log(Level.INFO,"Length: " + length);

                if (length <= 0) {
                    continue;
                    /*skip this url*/ }

                BasicOnlineFile.Builder fileBuilder = linkHandlerBuilder.createFile();

                try { // finding video/audio length
                    String dur = StringUtils.stringBetweenTwoStrings(singleUrl, "dur=", "&");
                    long duration = (int) (Double.parseDouble(dur) * 1000);
                    if (c_duration < 0) {
                        c_duration = duration;
                    }
                    fileBuilder.putLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS,
                            duration);
                    LOGGER.log(Level.INFO, "dur=" + dur);
                } catch (Exception a) {
                    // ignore
                }

                try { // finding the quality short name
                    String type = fileName.substring(fileName.indexOf("(") + 1);
                    type = type.substring(0, type.indexOf(")"));
                    fileBuilder.putStringPropertyValue(PropertyProvider.StringProperty.VARIANT_DESCRIPTION, type);
                    if (type.contains("480") || type.contains("1080")) {
                        fileBuilder.putBooleanPropertyValue(PropertyProvider.BooleanProperty.UNSTABLE_VARIANT,
                                true);
                    }
                    LOGGER.log(Level.INFO, "type=" + type);
                } catch (Exception a) {
                    a.printStackTrace();
                }

                fileName = nameOfVideo + " " + fileName;

                fileBuilder.setName(fileName).setUrl(singleUrl).setSize(length).next();
            }

            for (OnlineFile of : linkHandlerBuilder.getFiles()) {
                long dur = of.getPropertyProvider()
                        .getLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS);
                if (dur < 0 && c_duration > 0 && of.getPropertyProvider() instanceof BasicPropertyProvider) {
                    ((BasicPropertyProvider) of.getPropertyProvider()).putLongPropertyValue(
                            PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, c_duration);
                }
            }

        } catch (Exception ex) {
            int retryLimit = ((YT_TLH) tlh).retryLimit;
            ex.printStackTrace();
            LOGGER.log(Level.INFO, "retry no. = " + retryCount);
            if (retryCount > retryLimit)
                throw ex;
            return clipConverterExtraction(tlh, retryCount + 1);
        }

        return linkHandlerBuilder;
    }

    private long tryFindingSize(String rawURL) {
        try {
            String s = "size=";
            String sz = rawURL.substring(rawURL.indexOf(s) + s.length());
            if (sz.contains("#")) {
                sz = sz.substring(0, sz.indexOf("#"));
            }
            long size = Long.parseLong(sz);
            return size;
        } catch (Exception a) {
            /*size not found ignore*/
            a.printStackTrace();
        }
        return 0;
    }

    /**
     * Print all the url (debug purpose).
     * @param urls ArrayList<String> with all the urls.
     */
    private void printUrls(ArrayList<String> urls) {
        LOGGER.log(Level.INFO, "\n***** START PRINTING YOUTUBE URLS *****");
        for (String url : urls) {
            LOGGER.log(Level.INFO, url);
        }
        LOGGER.log(Level.INFO, "***** END PRINTING YOUTUBE URLS *****\n");
    }

    /**
     * Handle the captcha string.
     * @param jSonObject The JSONObject with the redirect url.
     * @return Returns true if the captcha is correct, false otherwise.
     */
    private boolean handleCaptcha(JSONObject jSonObject) {
        try {
            LOGGER.log(Level.INFO, "Handling captcha.");

            final String redirect = jSonObject.getString("redirect");
            final String url = "http://www.clipconverter.cc" + redirect;
            final DefaultHttpClient httpClient = NHttpClient.getNewInstance();

            //Get the captcha code
            Captcha captcha = new Captcha();
            captcha.setFormTitle("Captcha for Youtube.com");
            if (captcha.findCCaptchaUrlFromK(K_CHALLENGE_URL + K_CHALLENGE_CODE) != null) {
                captcha.findCaptchaImageURL();
                final String captchaString = captcha.getCaptchaString();

                HttpPost httpPost = new HttpPost(url);
                List<NameValuePair> formparams = new ArrayList<>();
                formparams.add(new BasicNameValuePair("recaptcha_challenge_field", captcha.getCCaptchaUrl()));
                formparams.add(new BasicNameValuePair("recaptcha_response_field", captchaString));

                UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8");
                httpPost.setEntity(entity);
                HttpResponse httpResponse = httpClient.execute(httpPost);
                final String responseString = EntityUtils.toString(httpResponse.getEntity());
                return !responseString.contains("Invalid captcha!");
            } else {
                throw new Exception("Captcha generic error");
            }
        } catch (JSONException ex) {
            //ex.printStackTrace();
            LOGGER.log(Level.INFO, "error", ex);
        } catch (Exception ex) {
            //ex.printStackTrace();
            LOGGER.log(Level.INFO, "error", ex);
        }

        return false;

    }

    static final class YT_TLH implements TrialLinkHandler {

        private final String url;
        private int retryLimit = 5;

        public void setRetryLimit(int retryLimit) {
            this.retryLimit = retryLimit;
        }

        YT_TLH(String url) {
            // normalize the url here
            this.url = Utils.normalize(url);
        }

        /**
         * Inspired by: <a href="http://stackoverflow.com/questions/3717115/regular-expression-for-youtube-links">Stack Overflow</a>
         * @param url
         * @return 
         */
        @Override
        public boolean canHandle() {
            boolean result = url.matches(
                    "https?://(www.youtube.com/watch\\?(feature=player_embedded&)?v=|youtu.be/)([\\w\\-\\_]*)(&(amp;)?[\\w\\?=((\\w)|(\\W))]*)?");
            LOGGER.log(Level.INFO, "Youtube can handle {0} ?  {1}", new Object[] { url, result });
            return result;
        }

        @Override
        public String getErrorMessage() {
            return canHandle() ? null : "Cannot handle";
        }

        @Override
        public boolean containsMultipleLinks() {
            return true;
        }

        @Override
        public String tempDisplayName() {
            return url;
        }

        @Override
        public String getReferenceLinkString() {
            return url;
        }
    };
}