List of usage examples for org.jsoup.nodes Document getElementsByTag
public Elements getElementsByTag(String tagName)
From source file:org.arb.extractor.DomTreeWalker.java
/** * Extract localizable resource from a code unit. * /*from ww w .jav a 2 s. co m*/ * @param codeUnit AbstractCodeUnit instance that has all information related to a source file. */ @Override public void extractResource(AbstractCodeUnit codeUnit) { Document doc = codeUnit.getDomDocument(); Elements elements = doc.getElementsByTag("html"); for (int i = 0; i < elements.size(); ++i) { extractResourceOnElement(elements.get(i), codeUnit); } }
From source file:org.asqatasun.processing.ProcessRemarkServiceImplTest.java
/** * Test of setDocument method, of class ProcessRemarkServiceImpl. *//* w w w .j a va 2 s. c om*/ public void testGetSnippetFromElement() { ProcessRemarkServiceImpl instance = new ProcessRemarkServiceImpl(null, null, null); //--------------------------------------------------------------------// //-----------------------Test1----------------------------------------// //--------------------------------------------------------------------// String rawHtml = "<label> <span>Rechercher:</span> " + "<input type=\"text\" onkeyup=\"return CatchEnter(event);\" " + "class=\"text\" id=\"searchfield\" " + "name=\"search&qudsqqqssqdsqdsqdo\" /></label>"; Document document = Jsoup.parse(rawHtml); Element element = document.getElementsByTag("label").iterator().next(); String snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element)); String expectedSnippet = "<label> <span>Rechercher:</span> " + "<input type=\"text\" onkeyup=\"return CatchEnter(event);\" " + "class=\"text\" id=\"searchfield\" " + "name=\"search&qudsqqqssqdsqdsqdo\" />[...]</label>"; assertEquals(expectedSnippet, snippet); //--------------------------------------------------------------------// //-----------------------Test2----------------------------------------// //--------------------------------------------------------------------// rawHtml = "<label> <span>New Rechercher:</span> " + "<p title=\"some title here\" onkeyup=\"return CatchEnter(event);\" " + " id=\"searchfield\" class=\"myclass other-class1 other-class2\" > " + "anything</p></label>"; document = Jsoup.parse(rawHtml); element = document.getElementsByTag("label").iterator().next(); snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element)); expectedSnippet = "<label> <span>New Rechercher:</span> " + "<p title=\"some title here\" onkeyup=\"return CatchEnter(event);\"" + " id=\"searchfield\" class=\"myclass other-class1 other-class2\">" + "[...]</p>[...]</label>"; assertEquals(expectedSnippet, snippet); //--------------------------------------------------------------------// //-----------------------Test3----------------------------------------// //--------------------------------------------------------------------// rawHtml = "<iframe align=\"left\" width=\"315px\" " + "scrolling=\"no\" height=\"160px\" frameborder=\"0\" " + "id=\"link-meteo\" src=\"http://www.anyUrl.com/module/onelocationsearch?ShowSearch=true&StartDate=2012-06-01&Days=2&location=bruxelles&url=http://meteo1.lavenir.net&cssfile=http://lavenir.net/extra/weather/styles.css\">" + "</iframe> "; document = Jsoup.parse(rawHtml); element = document.getElementsByTag("iframe").iterator().next(); snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element)); expectedSnippet = rawHtml.trim(); assertEquals(expectedSnippet, snippet); //--------------------------------------------------------------------// //-----------------------Test4----------------------------------------// //--------------------------------------------------------------------// rawHtml = " <center> <script type=\"text/javascript\"> if (articledetail == false) initAdhese('IMU.SUPER.WIDE'); </script> " + "<script src=\"http://anyUrl.com/ad3/sl_ave_home_-IMU.SUPER.WIDE/lafr/rn92/pv1/brFirefox;Firefox17;Linux;screenundefined/in;prx;;gmbl;/?t=1381234838205\" type=\"text/javascript\"></script> " + " <div class=\"adhese_300x250\"> <script src=\"http://1.adhesecdn.be/pool/lib/68641.js?t=1371729603000\"></script> " + "<script src=\"http://anyUrl.com/pagead/show_ads.js\" type=\"text/javascript\"></script>" + "<ins style=\"display:inline-table;border:none;height:250px;margin:0;padding:0;position:relative;visibility:visible;width:300px\">" + "<ins style=\"display:block;border:none;height:250px;margin:0;padding:0;position:relative;visibility:visible;width:300px\" id=\"aswift_1_anchor\">" + "<iframe width=\"300\" scrolling=\"no\" height=\"250\" frameborder=\"0\" style=\"left:0;position:absolute;top:0;\" name=\"aswift_1\" id=\"aswift_1\" onload=\"var i=this.id,s=window.google_iframe_oncopy,H=s&&s.handlers,h=H&&H[i],w=this.contentWindow,d;try{d=w.document}catch(e){}if(h&&d&&(!d.body||!d.body.firstChild)){if(h.call){setTimeout(h,0)}else if(h.match){w.location.replace(h)}}\" allowtransparency=\"true\" hspace=\"0\" vspace=\"0\" marginheight=\"0\" marginwidth=\"0\"></iframe>" + "</ins>" + "</ins>" + "</div> " + "</center> "; document = Jsoup.parse(rawHtml); element = document.getElementsByTag("center").iterator().next(); snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element)); expectedSnippet = "<center> <script type=\"text/javascript\"> if (articledetail == false) initAdhese('IMU.SUPER.WIDE'); </script> " + "<script src=\"http://anyUrl.com/ad3/sl_ave_home_-IMU.SUPER.WIDE/lafr/rn92/pv1/brFirefox;Firefox17;Linux;screenundefined/in;prx;;gmbl;/?t=1381234838205\" type=\"text/javascript\">[...]</script>" + "[...]</center>"; assertEquals(expectedSnippet, snippet); }
From source file:org.b3log.solo.plugin.list.ListHandler.java
@Override public void action(final Event<JSONObject> event) throws EventException { final JSONObject data = event.getData(); final JSONObject article = data.optJSONObject(Article.ARTICLE); String content = article.optString(Article.ARTICLE_CONTENT); final Document doc = Jsoup.parse(content, StringUtils.EMPTY, Parser.htmlParser()); doc.outputSettings().prettyPrint(false); final StringBuilder listBuilder = new StringBuilder(); listBuilder.append("<link rel=\"stylesheet\" type=\"text/css\" href=\"" + Latkes.getStaticServePath() + "/plugins/list/style.css\" />"); final Elements hs = doc.select("h1, h2, h3, h4, h5"); listBuilder.append("<ul class='b3-solo-list'>"); for (int i = 0; i < hs.size(); i++) { final Element element = hs.get(i); final String tagName = element.tagName().toLowerCase(); final String text = element.text(); final String id = "b3_solo_" + tagName + "_" + i; element.before("<span id='" + id + "'></span>"); listBuilder.append("<li class='b3-solo-list-").append(tagName).append("'><a href='#").append(id) .append("'>").append(text).append("</a></li>"); }/* w ww . ja va2 s .c o m*/ listBuilder.append("</ul>"); final Element body = doc.getElementsByTag("body").get(0); content = listBuilder.toString() + body.html(); article.put(Article.ARTICLE_CONTENT, content); }
From source file:org.b3log.symphony.util.Markdowns.java
/** * Gets the safe HTML content of the specified content. * * @param content the specified content/*from w w w. j ava 2s. c o m*/ * @param baseURI the specified base URI, the relative path value of href will starts with this URL * @return safe HTML content */ public static String clean(final String content, final String baseURI) { final Document.OutputSettings outputSettings = new Document.OutputSettings(); outputSettings.prettyPrint(false); final String tmp = Jsoup.clean(content, baseURI, Whitelist.relaxed().addAttributes(":all", "id", "target", "class") .addTags("span", "hr", "kbd", "samp", "tt", "del", "s", "strike", "u") .addAttributes("iframe", "src", "width", "height", "border", "marginwidth", "marginheight") .addAttributes("audio", "controls", "src") .addAttributes("video", "controls", "src", "width", "height") .addAttributes("source", "src", "media", "type") .addAttributes("object", "width", "height", "data", "type") .addAttributes("param", "name", "value") .addAttributes("input", "type", "disabled", "checked").addAttributes("embed", "src", "type", "width", "height", "wmode", "allowNetworking"), outputSettings); final Document doc = Jsoup.parse(tmp, baseURI, Parser.htmlParser()); final Elements ps = doc.getElementsByTag("p"); for (final Element p : ps) { p.removeAttr("style"); } final Elements iframes = doc.getElementsByTag("iframe"); for (final Element iframe : iframes) { final String src = StringUtils.deleteWhitespace(iframe.attr("src")); if (StringUtils.startsWithIgnoreCase(src, "javascript") || StringUtils.startsWithIgnoreCase(src, "data:")) { iframe.remove(); } } final Elements objs = doc.getElementsByTag("object"); for (final Element obj : objs) { final String data = StringUtils.deleteWhitespace(obj.attr("data")); if (StringUtils.startsWithIgnoreCase(data, "data:") || StringUtils.startsWithIgnoreCase(data, "javascript")) { obj.remove(); continue; } final String type = StringUtils.deleteWhitespace(obj.attr("type")); if (StringUtils.containsIgnoreCase(type, "script")) { obj.remove(); } } final Elements embeds = doc.getElementsByTag("embed"); for (final Element embed : embeds) { final String data = StringUtils.deleteWhitespace(embed.attr("src")); if (StringUtils.startsWithIgnoreCase(data, "data:") || StringUtils.startsWithIgnoreCase(data, "javascript")) { embed.remove(); continue; } } final Elements as = doc.getElementsByTag("a"); for (final Element a : as) { a.attr("rel", "nofollow"); final String href = a.attr("href"); if (href.startsWith(Latkes.getServePath())) { continue; } a.attr("target", "_blank"); } final Elements audios = doc.getElementsByTag("audio"); for (final Element audio : audios) { audio.attr("preload", "none"); } final Elements videos = doc.getElementsByTag("video"); for (final Element video : videos) { video.attr("preload", "none"); } String ret = doc.body().html(); ret = ret.replaceAll("(</?br\\s*/?>\\s*)+", "<br>"); // patch for Jsoup issue return ret; }
From source file:org.crazyt.xgogdownloader.Main.java
public static void main(String[] args) { Util util = new Util(); Config.sVersionString = VERSION_STRING + VERSION_NUMBER; Config.sConfigDirectory = "xgogdownloader"; Config.sCookiePath = "cookies.txt"; Config.sConfigFilePath = "config.cfg"; Config.sXMLDirectory = "xgogdownloader/xml"; // Create xgogdownloader directories File path = Factory.newFile(Config.sXMLDirectory); if (!path.exists()) { if (!path.mkdirs()) { System.out.print("Failed to create directory: "); System.out.print(path); throw new RuntimeException("Failed to create directory. "); }/*from w ww. j a v a 2 s . c o m*/ } path = Factory.newFile(Config.sConfigDirectory); if (!path.exists()) { if (!path.mkdirs()) { System.out.print("Failed to create directory: "); System.out.print(path); throw new RuntimeException("Failed to create directory. "); } } // Create help text for --platform option String platform_text = "Select which installers are downloaded\n"; int platform_sum = 0; for (int i = 0; i < GlobalConstants.PLATFORMS.size(); ++i) { platform_text += GlobalConstants.PLATFORMS.get(i).platformId + " = " + GlobalConstants.PLATFORMS.get(i).platformString + "\n"; platform_sum += GlobalConstants.LANGUAGES.get(i).languageId; } platform_text += platform_sum + " = All"; // Create help text for --language option String language_text = "Select which language installers are downloaded\n"; int language_sum = 0; for (int i = 0; i < GlobalConstants.LANGUAGES.size(); ++i) { language_text += GlobalConstants.LANGUAGES.get(i).languageId + " = " + GlobalConstants.LANGUAGES.get(i).languageString + "\n"; language_sum += GlobalConstants.LANGUAGES.get(i).languageId; } language_text += "Add the values to download multiple languages\nAll = " + language_sum + "\n" + "French + Polish = " + GlobalConstants.LANGUAGE_FR + "+" + GlobalConstants.LANGUAGE_PL + " = " + GlobalConstants.LANGUAGE_FR + GlobalConstants.LANGUAGE_PL; // Create help text for --check-orphans String[] orphans_regex_default = new String[] { "zip", "exe", "bin", "dmg", "old" }; // List<File> files = (List<File>) FileUtils.listFiles(dir, extensions, // true); String check_orphans_text = "Check for orphaned files (files found on local filesystem that are not found on GOG servers). Sets regular expression filter (Perl syntax) for files to check. If no argument is given then the regex defaults to '" + StringUtils.join(orphans_regex_default, ",") + "'"; CommandOptions options_cli_all = new CommandOptions(); CommandOptions options_cli_no_cfg = new CommandOptions(); ConfigOptions options_cli_cfg = new ConfigOptions(); ConfigOptions options_cfg_only = new ConfigOptions(); ConfigOptions options_cfg_all = new ConfigOptions(); try { OptionValue<Boolean> bInsecure = new OptionValue<>(false); OptionValue<Boolean> bNoColor = new OptionValue<>(false); OptionValue<Boolean> bNoUnicode = new OptionValue<>(false); OptionValue<Boolean> bNoDuplicateHandler = new OptionValue<>(false); OptionValue<Boolean> bNoCover = new OptionValue<>(false); OptionValue<Boolean> bNoInstallers = new OptionValue<>(false); OptionValue<Boolean> bNoExtras = new OptionValue<>(false); OptionValue<Boolean> bNoPatches = new OptionValue<>(false); OptionValue<Boolean> bNoLanguagePacks = new OptionValue<>(false); OptionValue<Boolean> bNoRemoteXML = new OptionValue<>(false); OptionValue<Boolean> bNoSubDirectories = new OptionValue<>(false); OptionValue<String> sGame = new OptionValue<>("free"); OptionValue<String> sToken = new OptionValue<>(""); OptionValue<String> sSecret = new OptionValue<>(""); OptionValue<String> sSearch = new OptionValue<>(""); OptionValue<Boolean> bList = new OptionValue<>(false); OptionValue<Boolean> bDownload = new OptionValue<>(false); OptionValue<Integer> iDownloadRate = new OptionValue<>(0); // //switch to OptionBuilder // Commandline options (no config file) options_cli_no_cfg.addOption("debug", "d", false, "Print debug messages"); options_cli_no_cfg.addOption("help", "h", false, "Print help message"); options_cli_no_cfg.addOption("version", false, "Print version information"); options_cli_no_cfg.addOption("versionUpdate", false, "Updates this program to the current version."); options_cli_no_cfg.addOption("login", true, "Login"); // config.bLogin false options_cli_no_cfg.addOption(bList, "list", false, "List games"); // config.bList false options_cli_no_cfg.addOption(sSearch, "search", true, "search games by title"); options_cli_no_cfg.addOption("listdetails", "list-details", true, "List games with detailed info"); // config.bListDetails // false options_cli_no_cfg.addOption(bDownload, "download", false, "Download"); // config.bDownload false options_cli_no_cfg.addOption("repair", true, "Repair downloaded files\nUse --repair --download to redownload files when filesizes don't match (possibly different version). Redownload will delete the old file"); // config.bRepair // false options_cli_no_cfg.addOption("game", true, "Set regular expression filter\nfor download/list/repair (Perl syntax)\nAliases: \"all\", \"free\""); // config.sGameRegex // "" options_cli_no_cfg.addOption("createxml", "create-xml", true, "Create GOG XML for file\n\"automatic\" to enable automatic XML creation"); // config.sXMLFile // "" options_cli_no_cfg.addOption("updatecheck", "update-check", true, "Check for update notifications"); // config.bUpdateCheck false options_cli_no_cfg.addOption("checkorphans", "check-orphans", true, check_orphans_text); // config.sOrphanRegex "" options_cli_no_cfg.addOption("status", true, "Show status of files\n\nOutput format:\nstatuscode gamename filename filesize filehash\n\nStatus codes:\nOK - File is OK\nND - File is not downloaded\nMD5 - MD5 mismatch, different version");// config.bCheckStatus // false options_cli_no_cfg.addOption("saveconfig", "save-config", true, "Create config file with current settings"); // config.bSaveConfig false options_cli_no_cfg.addOption("resetconfig", "reset-config", true, "Reset config settings to default"); // config.bResetConfig false options_cli_no_cfg.addOption("report", true, "Save report of downloaded/repaired files"); // config.bReport false // Commandline options (config file) options_cli_cfg.addOption("directory", true, "Set download directory"); // config.sDirectory "" options_cli_cfg.addOption(iDownloadRate, "limitRate", true, "Limit download rate to value in kB\n0 = unlimited"); // config.iDownloadRate 0 options_cli_cfg.addOption("xmlDirectory", true, "Set directory for GOG XML files"); // config.sXMLDirectory "" options_cli_cfg.addOption("chunkSize", true, "Chunk size (in MB) when creating XML"); // config.iChunkSize 10 options_cli_cfg.addOption("platform", true, platform_text); // config.iInstallerType GlobalConstants.PLATFORM_WINDOWS options_cli_cfg.addOption("language", true, language_text); // config.iInstallerLanguage GlobalConstants.LANGUAGE_EN options_cli_cfg.addOption("noInstallers", true, "Don't download/list/repair installers"); // bNoInstallers false options_cli_cfg.addOption("noExtras", true, "Don't download/list/repair extras"); // bNoExtras false options_cli_cfg.addOption("noPatches", true, "Don't download/list/repair patches"); // bNoPatches false options_cli_cfg.addOption("noLanguagePacks", true, "Don't download/list/repair language packs"); // bNoLanguagePacks false options_cli_cfg.addOption("noCover", true, "Don't download cover images"); // bNoCover false options_cli_cfg.addOption("noRemoteXml", true, "Don't use remote XML for repair"); // bNoRemoteXML false options_cli_cfg.addOption(bNoUnicode, "noUnicode", true, "Don't use Unicode in the progress bar"); // bNoUnicode false options_cli_cfg.addOption(bNoColor, "noColor", true, "Don't use coloring in the progress bar"); // bNoColor false options_cli_cfg.addOption("noDuplicateHandling", true, "Don't use duplicate handler for installers\nDuplicate installers from different languages are handled separately");// bNoDuplicateHandler // false options_cli_cfg.addOption("noSubdirectories", true, "Don't create subdirectories for extras, patches and language packs"); // bNoSubDirectories false options_cli_cfg.addOption("verbose", true, "Print lots of information"); options_cli_cfg.addOption("insecure", true, "Don't verify authenticity of SSL certificates"); // bInsecure false options_cli_cfg.addOption("timeout", true, "Set timeout for connection\nMaximum time in seconds that connection phase is allowed to take"); // config.iTimeout 10 options_cli_cfg.addOption("retries", true, "Set maximum number of retries on failed download"); // config.iRetries 3 // Options read from config file options_cfg_only.addOption(sToken, "token", true, "oauth token"); // config.sToken "" options_cfg_only.addOption(sSecret, "secret", true, "oauth secret"); // config.sSecret "" options_cli_all.addOptions(options_cli_no_cfg); options_cli_all.addOptions(options_cli_cfg); options_cfg_all.addOptions(options_cfg_only); options_cfg_all.addOptions(options_cli_cfg); options_cfg_all.parse(Config.sConfigFilePath); // boost.program_options.store(boost.program_options // .parse_command_line(argc, args, options_cli_all), vm); CommandLineParser parser = new GnuParser(); String[] args2; if (args.length == 0) { args2 = new String[] { "-help" }; } else { args2 = args; } CommandLine cmd = parser.parse(options_cli_all, args2); options_cli_all.parseCmdLine(cmd); path = Factory.newFile(Config.sConfigDirectory); if (path.exists()) { Properties prop = new Properties(); try { FileInputStream fileInputStream = new FileInputStream( Config.sConfigDirectory + File.separatorChar + Config.sConfigFilePath); try { prop.load(fileInputStream); } finally { fileInputStream.close(); } } catch (FileNotFoundException e) { System.out.println("Could not open config file: " + Config.sConfigDirectory + File.separatorChar + Config.sConfigFilePath + ", creating new one."); Factory.newFile(Config.sConfigDirectory + File.separatorChar + Config.sConfigFilePath) .createNewFile(); } } if (cmd.hasOption("help")) { System.out.println(Config.sVersionString); System.out.println("Options:"); for (Option option : (Collection<Option>) options_cli_all.getOptions()) { System.out.println(String.format("%20s\t-\t%s", option.getOpt(), option.getDescription().replace("\n", String.format("\n%20s\t \t", "")))); } return; } if (cmd.hasOption("version")) { System.out.print(Config.sVersionString); return; } if (cmd.hasOption("versionUpdate")) { String sub = "xgogdownloader-"; try { HttpClient client = Factory.createHttpClient(); HttpGet request = new HttpGet("https://drone.io/github.com/TheCrazyT/xgogdownloader/files"); request.setHeader("User-Agent", Main.USER_AGENT); HttpResponse response_full = client.execute(request); int result = response_full.getStatusLine().getStatusCode(); if (result != HttpStatus.SC_OK) { System.err.println("Error " + result); } String response = EntityUtils.toString(response_full.getEntity()); Document html = Jsoup.parse(response); Iterator<org.jsoup.nodes.Element> iterator = html.getElementsByTag("div").iterator(); while (iterator.hasNext()) { org.jsoup.nodes.Element node = iterator.next(); String hash = ""; Elements spans = node.getElementsByTag("span"); Iterator<org.jsoup.nodes.Element> iterator2 = spans.iterator(); while (iterator2.hasNext()) { org.jsoup.nodes.Element span = iterator2.next(); if (span.text().startsWith("SHA")) { hash = span.text().substring(4, 44); break; } } if (!hash.isEmpty()) { iterator2 = node.getElementsByTag("a").iterator(); while (iterator2.hasNext()) { Element a = iterator2.next(); String url = a.attr("href"); if (a.text().startsWith(sub) && a.text().endsWith(".zip")) { // TODO System.out.println("... TODO ..."); System.out.println(url); return; } } } } return; } catch (IOException e) { throw new RuntimeException(e); } } if (cmd.hasOption("chunkSize")) { Config.iChunkSize <<= 20; // Convert chunk size from bytes to megabytes } if (cmd.hasOption("limitRate")) { Config.iDownloadRate = iDownloadRate.getValue(); Config.iDownloadRate <<= 10; // Convert download rate from bytes to kilobytes } if (cmd.hasOption("check-orphans")) { if (Config.sOrphanRegex.isEmpty()) { Config.sOrphanRegex = StringUtils.join(orphans_regex_default, "|"); } } Config.bDownload = bDownload.getValue(); Config.sToken = sToken.getValue(); Config.sSecret = sToken.getValue(); Config.sSearch = sSearch.getValue(); Config.sGameRegex = sGame.getValue(); Config.bList = bList.getValue(); Config.bVerifyPeer = !bInsecure.getValue(); Config.bColor = !bNoColor.getValue(); Config.bUnicode = !bNoUnicode.getValue(); Config.bDuplicateHandler = !bNoDuplicateHandler.getValue(); Config.bCover = !bNoCover.getValue(); Config.bInstallers = !bNoInstallers.getValue(); Config.bExtras = !bNoExtras.getValue(); Config.bPatches = !bNoPatches.getValue(); Config.bLanguagePacks = !bNoLanguagePacks.getValue(); Config.bRemoteXML = !bNoRemoteXML.getValue(); Config.bSubDirectories = !bNoSubDirectories.getValue(); } catch (RuntimeException e) { System.err.println("Error: " + e.getMessage()); throw e; } catch (java.lang.Exception e) { System.err.println("Exception of unknown type!"); throw new RuntimeException(e); } if (Config.iInstallerType < GlobalConstants.PLATFORMS.get(0).platformId || Config.iInstallerType > platform_sum) { System.out.println("Invalid value for --platform"); throw new RuntimeException("Invalid value for --platform"); } if (Config.iInstallerLanguage < GlobalConstants.LANGUAGES.get(0).languageId || Config.iInstallerLanguage > language_sum) { System.out.println("Invalid value for --language"); throw new RuntimeException("Invalid value for --language"); } if (Config.sXMLDirectory != "") { // Make sure that xml directory doesn't have trailing slash if (Config.sXMLDirectory.charAt(Config.sXMLDirectory.length() - 1) == '/') { // config.sXMLDirectory.assign(config.sXMLDirectory.begin(),config.sXMLDirectory.end() // - 1); } } // Create GOG XML for a file if ((Config.sXMLFile != null) && !Config.sXMLFile.isEmpty() && !Config.sXMLFile.equals("automatic")) { util.createXML(Config.sXMLFile, Config.iChunkSize, Config.sXMLDirectory); } // Make sure that directory has trailing slash // if (Config.sDirectory != null && !Config.sDirectory.isEmpty()) { // if (Config.sDirectory.charAt(Config.sDirectory.length() - 1) != '/') // { // Config.sDirectory += "/"; // } // } Downloader downloader = new Downloader(); boolean result = downloader.init(); if (Config.bLogin) { if (!result) { throw new RuntimeException("downloader.init failed"); } return; } else if (Config.bSaveConfig) { // std.ofstream ofs = new // std.ofstream(config.sConfigFilePath.c_str()); String ofs = null; if (ofs != null) { System.out.println("Saving config: " + Config.sConfigFilePath); /* * for (boost.program_options.variables_map.iterator it = * vm.begin(); it != vm.end(); ++it) { String option = it.first; * String option_value_string; * boost.program_options.variable_value option_value = * it.second; * * try { if (option.equals(options_cfg_all.find(option, * false).long_name())) { if (!option_value.empty()) { * std.type_info type = option_value.value().type(); if (type == * typeid(String)) { option_value_string = * option_value.<String>as(); } * * } } } catch (java.lang.Exception e2) { continue; } * * if (option_value_string!="") { * System.out.println(option+" = "+option_value_string); //ofs * << option.compareTo() < 0 < < " = " << * option_value_string.compareTo() < 0 < < std.endl; } } * //ofs.close(); */ } else { System.out.println("Failed to create config: " + Config.sConfigFilePath); throw new RuntimeException("Failed to create config: " + Config.sConfigFilePath); } } else if (Config.bResetConfig) { String ofs = null; // std.ofstream ofs = new // std.ofstream(config.sConfigFilePath.c_str()); if (ofs != null) { /* * if (config.sToken!="" && config.sSecret!="") { ofs * +="token = " +config.sToken+"\n"; ofs +="secret = " * +config.sSecret+"\n"; } */ // ofs.close(); } else { System.out.println("Failed to create config: " + Config.sConfigFilePath); throw new RuntimeException("Failed to create config: " + Config.sConfigFilePath); } } else if (Config.bUpdateCheck) { // Update check has priority over download and list downloader.updateCheck(); } else if (Config.bRepair) { // Repair file downloader.repair(); } else if ((Config.sSearch != null) && (!Config.sSearch.isEmpty())) { // search games downloader.searchGames(Config.sSearch); } else if (Config.bDownload) { // Download games downloader.download(); } else if (Config.bListDetails || Config.bList) { // Detailed list of games/extras downloader.listGames(); } else if (Config.sOrphanRegex != null) { // Check for orphaned files if regex for orphans is set downloader.checkOrphans(); } else if (Config.bCheckStatus) { downloader.checkStatus(); } else { // Show help message System.out.println(Config.sVersionString + "" + options_cli_all); } // Orphan check was called at the same time as download. Perform it // after download has finished if (Config.sOrphanRegex != null && Config.bDownload) { downloader.checkOrphans(); } return; }
From source file:org.ednovo.gooru.application.util.ResourceImageUtil.java
public Map<String, Object> getResourceMetaData(String url, String resourceTitle, boolean fetchThumbnail) { Map<String, Object> metaData = new HashMap<String, Object>(); ResourceMetadataCo resourceFeeds = null; if (url != null && url.contains(VIMEO_VIDEO)) { resourceFeeds = getMetaDataFromVimeoVideo(url); } else if (url != null && url.contains(YOUTUBE_VIDEO)) { resourceFeeds = getYoutubeResourceFeeds(url, null); }/*from ww w . j a v a 2 s. co m*/ String description = ""; String title = ""; String videoDuration = ""; Set<String> images = new LinkedHashSet<String>(); if (resourceFeeds == null || resourceFeeds.getUrlStatus() == 404) { Document doc = null; try { if (url != null && (url.contains("http://") || url.contains("https://"))) { doc = Jsoup.connect(url).timeout(6000).get(); } } catch (Exception e) { e.printStackTrace(); } if (doc != null) { title = doc.title(); Elements meta = doc.getElementsByTag(META); if (meta != null) { for (Element element : meta) { if (element.attr(NAME) != null && element.attr(NAME).equalsIgnoreCase(DESCRIPTION)) { description = element.attr(CONTENT); break; } } } metaData.put(DESCRIPTION, description); if (fetchThumbnail) { Elements media = doc.select("[src]"); if (media != null) { for (Element src : media) { if (src.tagName().equals(IMG)) { images.add(src.attr("abs:src")); } if (images.size() >= SUGGEST_IMAGE_MAX_SIZE) { break; } } } } } } else { title = resourceFeeds.getTitle(); description = resourceFeeds.getDescription(); videoDuration = resourceFeeds.getDuration().toString(); } if (fetchThumbnail) { if (resourceFeeds != null && resourceFeeds.getThumbnail() != null) { images.add(resourceFeeds.getThumbnail()); } metaData.put(IMAGES, images); } metaData.put(TITLE, title); metaData.put(DESCRIPTION, description); metaData.put(DURATION, videoDuration); return metaData; }
From source file:org.jboss.tools.tycho.sitegenerator.GenerateCompositeSite.java
private void collectChildrenFromRemote(String collectChildrenFromRemoteURL2, String collectChildrenFromRemoteRegex2, int collectChildrenFromRemoteLimit2, List<String> childSitesList2) throws MojoFailureException { Document doc = null; try {/*from w ww . ja v a 2 s . c o m*/ // getLog().debug("Load children from: " + // collectChildrenFromRemoteURL2); doc = Jsoup.connect(collectChildrenFromRemoteURL2).get(); // getLog().debug("Regex to match: " + // collectChildrenFromRemoteRegex2); Elements links = doc.getElementsByTag("a"); // sort larges (newest) first Collections.sort(links, new Comparator<Element>() { @Override public int compare(Element e1, Element e2) { return e2.attr("href").compareTo(e1.attr("href")); } }); int linksAdded = 0; for (Element link : links) { String linkHref = link.attr("href"); if (collectChildrenFromRemoteRegex2 == null || (linkHref.matches(collectChildrenFromRemoteRegex2) && (linksAdded < collectChildrenFromRemoteLimit2 || collectChildrenFromRemoteLimit2 < 0))) { getLog().debug("Adding: " + linkHref); childSitesList2.add(collectChildrenFromRemoteURL2 + linkHref); linksAdded++; } } } catch (IOException ex) { throw new MojoFailureException(ex.getMessage(), ex); } doc = null; }
From source file:org.jboss.tools.windup.ui.internal.issues.IssueDetailsView.java
public static void addPrism(Document doc) { try {/*from ww w. j a v a2 s. c o m*/ Bundle bundle = WindupUIPlugin.getDefault().getBundle(); Elements codeElements = doc.getElementsByTag("code"); codeElements.forEach(element -> { Set<String> classNames = element.classNames(); Set<String> newNames = Sets.newHashSet(); classNames.forEach(className -> { // prismjs requires prefix, i'm not sure about another/easier workaround. newNames.add("language-" + className); }); element.classNames(newNames); }); DocumentType type = new DocumentType("html", "", "", ""); doc.insertChildren(0, Lists.newArrayList(type)); Element head = doc.head(); Element css = doc.createElement("link"); URL fileURL = FileLocator.find(bundle, new Path("html/prism.css"), null); String srcPath = FileLocator.resolve(fileURL).getPath(); css.attr("href", srcPath); css.attr("rel", "stylesheet"); head.appendChild(css); Element body = doc.body(); Element script = doc.createElement("script"); fileURL = FileLocator.find(bundle, new Path("html/prism.js"), null); srcPath = FileLocator.resolve(fileURL).getPath(); script.attr("src", srcPath); body.appendChild(script); } catch (Exception e) { WindupUIPlugin.log(e); } }
From source file:org.loklak.api.search.EventBriteCrawlerService.java
public static SusiThought crawlEventBrite(String url) { Document htmlPage = null; try {/*from ww w . j a va2 s . c om*/ htmlPage = Jsoup.connect(url).get(); } catch (Exception e) { e.printStackTrace(); } String eventID = null; String eventName = null; String eventDescription = null; // TODO Fetch Event Color String eventColor = null; String imageLink = null; String eventLocation = null; String startingTime = null; String endingTime = null; String ticketURL = null; Elements tagSection = null; Elements tagSpan = null; String[][] tags = new String[5][2]; String topic = null; // By default String closingDateTime = null; String schedulePublishedOn = null; JSONObject creator = new JSONObject(); String email = null; Float latitude = null; Float longitude = null; String privacy = "public"; // By Default String state = "completed"; // By Default String eventType = ""; String temp; Elements t; eventID = htmlPage.getElementsByTag("body").attr("data-event-id"); eventName = htmlPage.getElementsByClass("listing-hero-body").text(); eventDescription = htmlPage.select("div.js-xd-read-more-toggle-view.read-more__toggle-view").text(); eventColor = null; imageLink = htmlPage.getElementsByTag("picture").attr("content"); eventLocation = htmlPage.select("p.listing-map-card-street-address.text-default").text(); temp = htmlPage.getElementsByAttributeValue("property", "event:start_time").attr("content"); if (temp.length() >= 20) { startingTime = htmlPage.getElementsByAttributeValue("property", "event:start_time").attr("content") .substring(0, 19); } else { startingTime = htmlPage.getElementsByAttributeValue("property", "event:start_time").attr("content"); } temp = htmlPage.getElementsByAttributeValue("property", "event:end_time").attr("content"); if (temp.length() >= 20) { endingTime = htmlPage.getElementsByAttributeValue("property", "event:end_time").attr("content") .substring(0, 19); } else { endingTime = htmlPage.getElementsByAttributeValue("property", "event:end_time").attr("content"); } ticketURL = url + "#tickets"; // TODO Tags to be modified to fit in the format of Open Event "topic" tagSection = htmlPage.getElementsByAttributeValue("data-automation", "ListingsBreadcrumbs"); tagSpan = tagSection.select("span"); topic = ""; int iterator = 0, k = 0; for (Element e : tagSpan) { if (iterator % 2 == 0) { tags[k][1] = "www.eventbrite.com" + e.select("a.js-d-track-link.badge.badge--tag.l-mar-top-2").attr("href"); } else { tags[k][0] = e.text(); k++; } iterator++; } creator.put("email", ""); creator.put("id", "1"); // By Default temp = htmlPage.getElementsByAttributeValue("property", "event:location:latitude").attr("content"); if (temp.length() > 0) { latitude = Float.valueOf( htmlPage.getElementsByAttributeValue("property", "event:location:latitude").attr("content")); } temp = htmlPage.getElementsByAttributeValue("property", "event:location:longitude").attr("content"); if (temp.length() > 0) { longitude = Float.valueOf( htmlPage.getElementsByAttributeValue("property", "event:location:longitude").attr("content")); } // TODO This returns: "events.event" which is not supported by Open // Event Generator // eventType = htmlPage.getElementsByAttributeValue("property", // "og:type").attr("content"); String organizerName = null; String organizerLink = null; String organizerProfileLink = null; String organizerWebsite = null; String organizerContactInfo = null; String organizerDescription = null; String organizerFacebookFeedLink = null; String organizerTwitterFeedLink = null; String organizerFacebookAccountLink = null; String organizerTwitterAccountLink = null; temp = htmlPage.select("a.js-d-scroll-to.listing-organizer-name.text-default").text(); if (temp.length() >= 5) { organizerName = htmlPage.select("a.js-d-scroll-to.listing-organizer-name.text-default").text() .substring(4); } else { organizerName = ""; } organizerLink = url + "#listing-organizer"; organizerProfileLink = htmlPage .getElementsByAttributeValue("class", "js-follow js-follow-target follow-me fx--fade-in is-hidden") .attr("href"); organizerContactInfo = url + "#lightbox_contact"; Document orgProfilePage = null; try { orgProfilePage = Jsoup.connect(organizerProfileLink).get(); } catch (Exception e) { e.printStackTrace(); } if (orgProfilePage != null) { t = orgProfilePage.getElementsByAttributeValue("class", "l-pad-vert-1 organizer-website"); if (t != null) { organizerWebsite = orgProfilePage .getElementsByAttributeValue("class", "l-pad-vert-1 organizer-website").text(); } else { organizerWebsite = ""; } t = orgProfilePage.select("div.js-long-text.organizer-description"); if (t != null) { organizerDescription = orgProfilePage.select("div.js-long-text.organizer-description").text(); } else { organizerDescription = ""; } organizerFacebookFeedLink = organizerProfileLink + "#facebook_feed"; organizerTwitterFeedLink = organizerProfileLink + "#twitter_feed"; t = orgProfilePage.getElementsByAttributeValue("class", "fb-page"); if (t != null) { organizerFacebookAccountLink = orgProfilePage.getElementsByAttributeValue("class", "fb-page") .attr("data-href"); } else { organizerFacebookAccountLink = ""; } t = orgProfilePage.getElementsByAttributeValue("class", "twitter-timeline"); if (t != null) { organizerTwitterAccountLink = orgProfilePage .getElementsByAttributeValue("class", "twitter-timeline").attr("href"); } else { organizerTwitterAccountLink = ""; } } JSONArray socialLinks = new JSONArray(); JSONObject fb = new JSONObject(); fb.put("id", "1"); fb.put("name", "Facebook"); fb.put("link", organizerFacebookAccountLink); socialLinks.put(fb); JSONObject tw = new JSONObject(); tw.put("id", "2"); tw.put("name", "Twitter"); tw.put("link", organizerTwitterAccountLink); socialLinks.put(tw); JSONArray jsonArray = new JSONArray(); JSONObject event = new JSONObject(); event.put("event_url", url); event.put("id", eventID); event.put("name", eventName); event.put("description", eventDescription); event.put("color", eventColor); event.put("background_url", imageLink); event.put("closing_datetime", closingDateTime); event.put("creator", creator); event.put("email", email); event.put("location_name", eventLocation); event.put("latitude", latitude); event.put("longitude", longitude); event.put("start_time", startingTime); event.put("end_time", endingTime); event.put("logo", imageLink); event.put("organizer_description", organizerDescription); event.put("organizer_name", organizerName); event.put("privacy", privacy); event.put("schedule_published_on", schedulePublishedOn); event.put("state", state); event.put("type", eventType); event.put("ticket_url", ticketURL); event.put("social_links", socialLinks); event.put("topic", topic); jsonArray.put(event); JSONObject org = new JSONObject(); org.put("organizer_name", organizerName); org.put("organizer_link", organizerLink); org.put("organizer_profile_link", organizerProfileLink); org.put("organizer_website", organizerWebsite); org.put("organizer_contact_info", organizerContactInfo); org.put("organizer_description", organizerDescription); org.put("organizer_facebook_feed_link", organizerFacebookFeedLink); org.put("organizer_twitter_feed_link", organizerTwitterFeedLink); org.put("organizer_facebook_account_link", organizerFacebookAccountLink); org.put("organizer_twitter_account_link", organizerTwitterAccountLink); jsonArray.put(org); JSONArray microlocations = new JSONArray(); jsonArray.put(new JSONObject().put("microlocations", microlocations)); JSONArray customForms = new JSONArray(); jsonArray.put(new JSONObject().put("customForms", customForms)); JSONArray sessionTypes = new JSONArray(); jsonArray.put(new JSONObject().put("sessionTypes", sessionTypes)); JSONArray sessions = new JSONArray(); jsonArray.put(new JSONObject().put("sessions", sessions)); JSONArray sponsors = new JSONArray(); jsonArray.put(new JSONObject().put("sponsors", sponsors)); JSONArray speakers = new JSONArray(); jsonArray.put(new JSONObject().put("speakers", speakers)); JSONArray tracks = new JSONArray(); jsonArray.put(new JSONObject().put("tracks", tracks)); String userHome = System.getProperty("user.home"); String path = userHome + "/Downloads/EventBriteInfo"; new File(path).mkdir(); try (FileWriter file = new FileWriter(path + "/event.json")) { file.write(event.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/org.json")) { file.write(org.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/social_links.json")) { file.write(socialLinks.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/microlocations.json")) { file.write(microlocations.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/custom_forms.json")) { file.write(customForms.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/session_types.json")) { file.write(sessionTypes.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/sessions.json")) { file.write(sessions.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/sponsors.json")) { file.write(sponsors.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/speakers.json")) { file.write(speakers.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/tracks.json")) { file.write(tracks.toString()); } catch (IOException e1) { e1.printStackTrace(); } SusiThought json = new SusiThought(); json.setData(jsonArray); return json; }
From source file:org.loklak.api.search.InstagramProfileScraper.java
public static SusiThought scrapeInstagram(String profile) { Document htmlPage = null; try {/*from w w w.j a v a2s .c o m*/ htmlPage = Jsoup.connect("https://www.instagram.com/" + profile).get(); } catch (IOException e) { e.printStackTrace(); } String script = htmlPage.getElementsByTag("script").get(6).html().substring(21); JSONObject obj = new JSONObject(script); JSONArray instaProfile = new JSONArray(); instaProfile.put(obj.get("entry_data")); SusiThought json = new SusiThought(); json.setData(instaProfile); return json; }