Java tutorial
/**************************************************************************** * Copyright (c) 2010 Giorgio Sironi. All rights reserved. * This program and the accompanying materials are made available under * the terms of the Eclipse Public License v1.0 which accompanies this * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html ****************************************************************************/ package it.polimi.chansonnier.agent; import it.polimi.chansonnier.utils.URLUtils; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; import java.net.URLDecoder; import java.util.HashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; public class YoutubeGrabber { private final Log _log = LogFactory.getLog(YoutubeGrabber.class); public InputStream getVideo(String pageUrl) { try { URL video = new URL(getVideoUrl(pageUrl)); _log.debug("it.polimi.chansonnier.agent.YoutubeGrabber: downloading " + video); URLConnection connection = video.openConnection(); return connection.getInputStream(); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e.getMessage()); } } /** * * @param pageUrl * @return url of the FLV file * @throws Exception */ private String getVideoUrl(String pageUrl) throws Exception { URL page = new URL(pageUrl); String query = page.getQuery(); String[] tokens = query.split("&"); HashMap<String, String> params = new HashMap<String, String>(); for (int i = 0; i < tokens.length; i++) { String[] parts = tokens[i].split("="); params.put(parts[0], parts[1]); } String video_id = params.get("v"); if (video_id == null) video_id = inbtwn(URLDecoder.decode(getRedirUrl(pageUrl), "UTF-8"), "v=", "&"); String pageSource = URLUtils.retrieve(new URL("http://www.youtube.com/watch?v=" + video_id)); String title = inbtwn(pageSource, "'VIDEO_TITLE': '", "',"); if (title == null) title = inbtwn(pageSource, "name=\"title\" content=\"", "\""); if (title == null) { throw new RuntimeException("The page does not contain a video (" + pageUrl + ")."); } title = setHTMLEntity(title); String token = inbtwn(pageSource, "\"t\": \"", "\""); if (token == null) token = inbtwn(pageSource, "&t=", "&"); if (!token.endsWith("%3D")) token = inbtwnmore(pageSource, "&t=", "&", 2); String dl_flvlow = null; String dl_flvmed = null; String dl_flvmed2 = null; String dl_flvhigh = null; dl_flvmed = getRedirUrl( "http://www.youtube.com/get_video?video_id=" + video_id + "&t=" + token + "&fmt=34"); if (dl_flvmed != null) { _log.debug("flvmed for video " + video_id); return dl_flvmed; } dl_flvmed2 = getRedirUrl( "http://www.youtube.com/get_video?video_id=" + video_id + "&t=" + token + "&fmt=6"); if (dl_flvmed2 != null) { _log.debug("flvmed2 for video " + video_id); return dl_flvmed2; } dl_flvlow = getRedirUrl("http://www.youtube.com/get_video?video_id=" + video_id + "&t=" + token + "&fmt=5"); if (dl_flvlow != null) { _log.debug("flvlow for video " + video_id); return dl_flvlow; } dl_flvhigh = getRedirUrl( "http://www.youtube.com/get_video?video_id=" + video_id + "&t=" + token + "&fmt=35"); if (dl_flvhigh != null) { _log.debug("flvhigh for video " + video_id); return dl_flvhigh; } // other formats String dl_3gplow = null; String dl_3gpmed = null; String dl_3gphigh = null; String dl_mp4high = null; String dl_mp4hd = null; String dl_mp4hd2 = null; if (dl_3gplow == null) dl_3gplow = getRedirUrl( "http://www.youtube.com/get_video?video_id=" + video_id + "&t=" + token + "&fmt=13"); if (dl_3gplow != null) return dl_3gplow; if (dl_3gpmed == null) dl_3gpmed = getRedirUrl( "http://www.youtube.com/get_video?video_id=" + video_id + "&t=" + token + "&fmt=17"); if (dl_3gpmed != null) return dl_3gpmed; if (dl_3gphigh == null) dl_3gphigh = getRedirUrl( "http://www.youtube.com/get_video?video_id=" + video_id + "&t=" + token + "&fmt=36"); if (dl_3gphigh != null) return dl_3gphigh; if (dl_mp4high == null) dl_mp4high = getRedirUrl( "http://www.youtube.com/get_video?video_id=" + video_id + "&t=" + token + "&fmt=18"); if (dl_mp4high != null) return dl_mp4high; if (dl_mp4hd == null) dl_mp4hd = getRedirUrl( "http://www.youtube.com/get_video?video_id=" + video_id + "&t=" + token + "&fmt=22"); if (dl_mp4hd != null) return dl_mp4hd; if (dl_mp4hd2 == null) dl_mp4hd2 = getRedirUrl( "http://www.youtube.com/get_video?video_id=" + video_id + "&t=" + token + "&fmt=37"); if (dl_mp4hd2 != null) return dl_mp4hd2; throw new Exception("No suitable file found."); } private String inbtwn(String input, String startcut, String finishcut) { String output = null; try { String[] arr1 = input.split(startcut); String[] arr2 = arr1[1].split(finishcut); output = arr2[0]; } catch (Exception ex) { return null; } return output; } private String inbtwnmore(String input, String startcut, String finishcut, int times) { String output = null; try { String[] arr1 = input.split(startcut); String[] arr2 = arr1[times].split(finishcut); output = arr2[0]; } catch (Exception ex) { return null; } return output; } private String getRedirUrl(String url) { String hdr = ""; try { HttpURLConnection conn = (HttpURLConnection) new URL(url).openConnection(); conn.setInstanceFollowRedirects(false); conn.addRequestProperty("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.8) Gecko/20100215 Ubuntu/9.04 (jaunty) Shiretoko/3.5.8"); hdr = conn.getHeaderField("location"); } catch (Exception e) { e.printStackTrace(); return null; } return hdr; } private String setHTMLEntity(String input) { String output = ""; try { output = input.replace("&", "_").toString(); output = output.replace("<", "_").toString(); output = output.replace(">", "_").toString(); output = output.replace("'", "_").toString(); output = output.replace(""", "_").toString(); output = output.replace("&", "_").toString(); output = output.replace("\\\"", "_").toString(); output = output.replace("\\'", "_").toString(); output = output.replace("'", "_").toString(); output = output.replace("'", "_").toString(); output = output.replace("<", "_").toString(); output = output.replace(">", "_").toString(); } catch (Exception e) { e.printStackTrace(); return input; } return output; } }