Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package; import; import; import; import org.apache.commons.lang.StringUtils; import; public class YouTubeMiner { private PageFetcher fetcher = new PageFetcher(); public YouTubeMinerResult getData(String url) { YouTubeMinerResult result = new YouTubeMinerResult(); String content = fetcher.fetchOrigHTML(url); try { FileUtils.writeStringToFile(new File(url.replace(':', '_').replace('/', '_')), content); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } if (url.indexOf("channel") > -1) { try { // subscriber-count" title="30" String subscribersStr = StringUtils.substringBetween(content, "subscriber-count", "tabindex"); String dirtyNumber = StringUtils.substringBetween(subscribersStr, "title=\"", "\""); String cleanNumber = dirtyNumber.replaceAll("[^\\x00-\\x7F]", ""); if (cleanNumber != null) { int subscribers = Integer.parseInt(cleanNumber); result.subscribers = subscribers; } else { System.err.println("Not found data for 'subscriber-count', 'tabindex'"); } } catch (NumberFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { try { String subscribersStr = StringUtils.substringBetween(content, "subscriber-count", "tabindex"); String dirtyNumber = StringUtils.substringBetween(subscribersStr, "title=\"", "\"").replace(" ", ""); if (dirtyNumber != null) { int subscribers = Integer.parseInt(dirtyNumber); result.subscribers = subscribers; } else { System.err.println("Not found data for 'subscriber-count', 'tabindex'"); } String viewsStrDirty = StringUtils.substringBetween(content, // "div class=\"watch-view-count\">"," views</div>"); // view-count">12 ?</div> "view-count", "<div>"); String viewsStr = StringUtils.substringBetween(viewsStrDirty, ">", " "); if (viewsStr != null) { int views = Integer.parseInt(viewsStr); result.views = views; } else { System.err.println("Not found data for 'view-count','<div>'"); } } catch (NumberFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return result; } public static void main(String[] args) { YouTubeMiner miner = new YouTubeMiner(); System.out.println(miner.getData("")); System.out.println(miner.getData("")); System.out.println(miner.getData("")); System.out.println(miner.getData("")); } }