List of usage examples for org.jsoup.nodes Document title
public String title()
From source file:com.techcavern.wavetact.eventListeners.FunMsgListener.java
@Override public void onMessage(MessageEvent event) throws Exception { boolean funmsg = false; Record rec = DatabaseUtils.getChannelProperty(IRCUtils.getNetworkNameByNetwork(event.getBot()), event.getChannel().getName(), "funmsg"); if (rec != null && rec.getValue(Channelproperty.CHANNELPROPERTY.VALUE).equalsIgnoreCase("true")) funmsg = true;/*from w w w. j av a 2 s. com*/ final boolean funmsg2 = funmsg; class process implements Runnable { public void run() { String commandchar = IRCUtils.getCommandChar(event.getBot(), event.getChannel()); if (commandchar == null) { return; } if (PermUtils.getPermLevel(event.getBot(), event.getUser().getNick(), event.getChannel()) > -2 && !event.getMessage().startsWith(commandchar)) { String[] message = StringUtils.split(event.getMessage(), " "); for (String arg : message) { try { arg = Colors.removeFormattingAndColors(arg); if (arg.toLowerCase().replaceAll("o+", "o").replaceAll("0+", "o").contains("yolo") && funmsg2) { if (IRCUtils.checkIfCanKick(event.getChannel(), event.getBot(), event.getUser())) { IRCUtils.sendKick(event.getBot().getUserBot(), event.getUser(), event.getBot(), event.getChannel(), "YOLO"); } else { IRCUtils.sendAction(event.getUser(), event.getBot(), event.getChannel(), "kicks " + IRCUtils.noPing(event.getUser().getNick()) + " (YOLO)", ""); } return; } Record autourlRecord = DatabaseUtils.getChannelProperty( IRCUtils.getNetworkNameByNetwork(event.getBot()), event.getChannel().getName(), "autourl"); boolean autourl = autourlRecord != null && autourlRecord.getValue(CHANNELPROPERTY.VALUE).equalsIgnoreCase("true"); Record ignorehttpRecord = DatabaseUtils.getChannelProperty( IRCUtils.getNetworkNameByNetwork(event.getBot()), event.getChannel().getName(), "ignorehttp"); boolean ignorehttp = ignorehttpRecord != null && ignorehttpRecord.getValue(CHANNELPROPERTY.VALUE).equalsIgnoreCase("true"); if (ignorehttp && !arg.startsWith("https://") && !arg.startsWith("http://")) { arg = "http://" + arg; } if ((funmsg2 || autourl) && Registry.urlValidator.isValid(arg)) { try { Document doc = Jsoup.connect(arg).userAgent(Registry.USER_AGENT).get(); if (doc.location().contains("stop-irc-bullying.eu") && funmsg2) { if (IRCUtils.checkIfCanKick(event.getChannel(), event.getBot(), event.getUser())) { IRCUtils.sendKick(event.getBot().getUserBot(), event.getUser(), event.getBot(), event.getChannel(), "? \\ ()/ ? [https://goo.gl/Tkb9dh]"); } else { IRCUtils.sendAction(event.getUser(), event.getBot(), event.getChannel(), "kicks " + IRCUtils.noPing(event.getUser().getNick()) + " (? \\ ()/ ?) [https://goo.gl/Tkb9dh]", ""); } /** * My apologies to those using this site responsibly. But in my experience, this site has been linked numerous times for entertainment purposes * In fact, I have yet to notice a time when it is linked for its intended purpose. And if you are using this site for its intended purpose, please think of * better of way of expressing how you feel. Linking a generic site rarely solves any problems. Instead explain to the person how and why they offended you. If * they ignore you, then you ignore them. */ } else if (autourl) { String title = doc.title(); if (!title.isEmpty()) { IRCUtils.sendMessage(event.getBot(), event.getChannel(), "[" + IRCUtils.noPing(event.getUser().getNick()) + "] " + title, ""); } } } catch (Exception e) { e.printStackTrace(); } } } catch (Exception e) { e.printStackTrace(); } } } } } Registry.threadPool.execute(new process()); }
From source file:autoInsurance.BeiJPiccImpl.java
public String login(String in) { // TODO Auto-generated method stub String out = ""; JSONObject jsonObject = JSONObject.fromObject(in); String ukey = jsonObject.getString("ukey"); String loginName = jsonObject.getString("loginName"); String password = jsonObject.getString("password"); String url = "http://10.134.136.48:8000/prpall/index.jsp"; String httpOrgCreateTestRtn = httpClientUtil.doPost(url, new HashMap<String, String>(), charset); if (httpOrgCreateTestRtn == null) { return "{\"success\": false, \"msg\": \"\"}"; }//from ww w. j a v a2 s. c om // write2Html(httpOrgCreateTestRtn); Document doc = Jsoup.parse(httpOrgCreateTestRtn); System.out.println(doc.title()); if (doc.title().contains("PICC")) return "{\"success\": false, \"msg\": \"!\"}"; String action = ""; if (doc.getElementById("fm") != null) action = doc.getElementById("fm").attr("action"); url = "https://10.134.136.48:8888" + action; String lt = doc.getElementsByAttributeValue("name", "lt").get(0).attr("value"); String postData = "PTAVersion=&toSign=&Signature=&rememberFlag=0&userMac=&key=yes&errorKey=null&loginMethod=nameAndPwd&username=" + loginName + "&password=" + password + "<=" + lt + "&_eventId=submit&pcguid=&button.x=20&button.y=17"; Map<String, String> map = null; try { map = parse2Map(postData); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } String respStr = httpClientUtil.doPost(url, map, charset); write2Html(respStr); doc = Jsoup.parse(respStr); System.out.println(doc.title()); // httpOrgCreateTestRtn = httpClientUtil.doPost("http://10.134.136.48:8000/prpall/business/quickProposal.do?bizType=PROPOSAL&editType=NEW&is4S=Y",null,charset); // doc = Jsoup.parse(httpOrgCreateTestRtn); // try { // init(doc); // } catch (Exception e1) { // // TODO Auto-generated catch block // e1.printStackTrace(); // } String comCode = templateData.get("prpCmain.comCode"); // String handler1Code = templateData.get("prpCmain.handler1Code");// String agentCode = templateData.get("prpCmain.agentCode");// String businessNature = templateData.get("prpCmain.businessNature");// String param = "actionType=query&fieldIndex=206&fieldValue=" + agentCode + "&codeMethod=change&codeType=select&codeRelation=0%2C1%2C2&isClear=Y&otherCondition=operateDate%3D" + new SimpleDateFormat("yyyy-MM-dd").format(new Date()) + "%2CriskCode%3DDAA%2CcomCode%3D" + comCode + "%2CbusinessNature%3D" + businessNature + "&typeParam=&callBackMethod=MainTotal.setAgentCode()%3BMainTotal.clearForAgentType()%3BItemCar.checkSelectKYFlag()%3B&getDataMethod=getAgents"; respStr = httpClientUtil.doPost("http://10.134.136.48:8000/prpall/common/changeCodeInput.do?" + param, new HashMap<String, String>(), charset); //System.out.println(respStr);// 11003O100375_FIELD_SEPARATOR__FIELD_SEPARATOR_3O1000 String[] _field_separator = respStr.split("_FIELD_SEPARATOR_"); if (_field_separator.length < 3) return "{\"success\": false, \"msg\": \"\"}"; String agentName = _field_separator[1]; String agentType = _field_separator[2]; templateData.put("agentType", agentType); try { param = "comCode=" + URLEncoder.encode(comCode, charset) + "&handler1Code=" + URLEncoder.encode(handler1Code, charset) + "&agentCode=" + URLEncoder.encode(agentCode, charset) + "&businessNature=" + URLEncoder.encode(businessNature, charset); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } respStr = httpClientUtil.doPost("http://10.134.136.48:8000/prpall/business/getCheckUserMsg.do?" + param, new HashMap<String, String>(), charset); Map retMap = JackJson.fromJsonToObject(respStr, Map.class); //System.out.println(retMap); String b = templateData.get("isCqp"); String qualificationName = ""; if (templateData.get("qualificationName") != null) { if (StringUtils.equals("1", b)) { qualificationName = templateData.get("prpQmainVoagentName"); } else { qualificationName = templateData.get("prpCmainagentName"); } } templateData.put("qualificationName", qualificationName); templateData.put("qualificationNo", (String) ((Map) ((List) retMap.get("data")).get(0)).get("permitNo")); templateData.put("prpCmainCommon.queryArea", "110000"); templateData.put("queryArea", ""); templateData.put("prpCinsureds[0].countryCode", "CHN"); templateData.put("resident[0]", "0"); templateData.put("LicenseColorCodeDes", ""); templateData.put("prpCitemCar.licenseColorCode", "01"); // 115192BJ templateData.put("agentCodeValidValue", ukey); templateData.put("agentCodeValidType", "U"); out = "{\"success\": true, \"msg\": \"" + loginName + "," + agentName + ",\"}"; return out; }
From source file:autoInsurance.BeiJPiccImpl.java
public String saveAndHeB(String in, Map<String, String> formMap) { // TODO Auto-generated method stub JSONObject jsonObject = JSONObject.fromObject(in); String hebXianZ = jsonObject.getString("hebXianZ"); // String saveHebId = jsonObject.getString("saveHebId"); JSONArray relation = jsonObject.getJSONArray("relation"); String carOwner_customMobile = ""; String carOwner_addressCName = ""; String carOwner_customerCode = ""; String carOwner_identifyNumber = ""; String carOwner_customerCName = ""; for (Object object : relation) { JSONObject jObj = (JSONObject) object; // if(jObj.getString("type").equals("cheZ")) { carOwner_customMobile = jObj.getString("customMobile"); carOwner_addressCName = jObj.getString("addressCName"); carOwner_customerCode = jObj.getString("customerCode"); carOwner_identifyNumber = jObj.getString("identifyNumber"); carOwner_customerCName = jObj.getString("customerCName"); continue; // } }/* w w w.j ava 2 s . com*/ formMap.put("insuredCarOwner", carOwner_customerCName); formMap.put("prpCinsureds[0].sex", "1");//0 formMap.put("prpCinsureds[0].age", "49");// formMap.put("prpCinsureds[0].insuredFlag", "11100000000000000000000000000A"); formMap.put("prpCinsureds[0].auditStatus", "2");// formMap.put("prpCinsureds[0].versionNo", "3");// formMap.put("prpCinsureds[0].identifyNumber", carOwner_identifyNumber);// formMap.put("prpCinsureds[0].insuredAddress", carOwner_addressCName);// formMap.put("prpCinsureds[0].insuredCode", carOwner_customerCode);// formMap.put("prpCinsureds[0].drivingLicenseNo", carOwner_identifyNumber);// formMap.put("prpCinsureds[0].mobile", carOwner_customMobile);// formMap.put("prpCcarShipTax.taxPayerCode", carOwner_customerCode);// formMap.put("prpCcarShipTax.taxPayerNumber", carOwner_identifyNumber);// formMap.put("prpCcarShipTax.taxPayerIdentNo", carOwner_identifyNumber);// String url = "http://10.134.136.48:8000/prpall/business/refreshPlanByTimes.do"; String respStr = httpClientUtil.doPost(url, formMap, "gbk"); System.out.println(":"); System.out.println(respStr); Map planMap = JackJson.fromJsonToObject(respStr, Map.class); List planData = (List) planMap.get("data"); for (int i = 0; i < planData.size(); i++) { Map plan = (Map) planData.get(i); formMap.put("cplans[" + i + "].backPlanFee", plan.get("planFee").toString()); formMap.put("cplans[" + i + "].planFee", plan.get("planFee").toString());//null formMap.put("cplan[" + i + "].payReasonC", plan.get("payReasonName").toString());//null formMap.put("prpCplanTemps[" + i + "].netPremium", plan.get("netPremium") == null ? "" : plan.get("netPremium").toString());//7275.58 formMap.put("prpCplanTemps[" + i + "].payReason", plan.get("payReason").toString());//null formMap.put("prpCplanTemps[" + i + "].taxPremium", plan.get("taxPremium") == null ? "" : plan.get("taxPremium").toString());//436.54 formMap.put("prpCplanTemps[" + i + "].planDate", timeStamp2Date(((Map) plan.get("planDate")).get("time").toString(), "yyyy-M-d"));//null formMap.put("prpCplanTemps[" + i + "].subsidyRate", plan.get("subsidyRate").toString());//null formMap.put("prpCplanTemps[" + i + "].payNo", plan.get("payNo").toString());//null formMap.put("prpCplanTemps[" + i + "].isBICI", plan.get("isBICI").toString());//null formMap.put("prpCplanTemps[" + i + "].planFee", plan.get("planFee").toString());//null formMap.put("prpCplanTemps[" + i + "].delinquentFee", plan.get("delinquentFee").toString());//null formMap.put("cplan_[" + i + "].payReasonC", plan.get("payReasonName").toString());//null } url = "http://10.134.136.48:8000/prpall/business/queryPayFor.do?agreementNo=&riskCode=DAA&comCode=11026871&chgCostRate=0"; respStr = httpClientUtil.doPost(url, formMap, "gbk"); System.out.println("querypayfor:"); System.out.println(respStr); Map payMap = JackJson.fromJsonToObject(respStr, Map.class); if (((List) payMap.get("data")).size() < 1) return "\"success\": false, \"msg\":\" queryPayFor \""; List prpDpayForPolicies = (List) ((Map) ((List) payMap.get("data")).get(0)).get("prpDpayForPolicies"); for (int i = 0; i < prpDpayForPolicies.size(); i++) { Map dpay = (Map) prpDpayForPolicies.get(i); formMap.put("prpCcommissionsTemp[" + i + "].agreementNo", ((Map) dpay.get("id")).get("agreementNo").toString());//null formMap.put("prpCcommissionsTemp[" + i + "].riskCode", dpay.get("riskCode").toString());//null formMap.put("prpCcommissionsTemp[" + i + "].auditRate", "");//null formMap.put("prpCcommissionsTemp[" + i + "].coinsRate", "100");//null formMap.put("prpCcommissionsTemp[" + i + "].adjustFlag", dpay.get("adjustFlag").toString());//null formMap.put("prpCcommissionsTemp[" + i + "].costRateUpper", dpay.get("costRateUpper").toString());//null formMap.put("prpCcommissionsTemp[" + i + "].configCode", ((Map) dpay.get("id")).get("configCode").toString());//null formMap.put("prpCcommissionsTemp[" + i + "].costType", dpay.get("costType").toString());//null formMap.put("prpCcommissionsTemp[" + i + "].costFee", "0");//null if (dpay.get("riskCode").toString().equals("DAA")) { formMap.put("prpCcommissionsTemp[" + i + "].sumPremium", formMap.get("prpCmain.sumPremium"));//null } else { formMap.put("prpCcommissionsTemp[" + i + "].sumPremium", formMap.get("prpCitemKindCI.premium"));//null } formMap.put("prpCcommissionsTemp[" + i + "].costRate", dpay.get("costRate").toString());//null } List prpDdismantleDetails = (List) ((Map) ((List) payMap.get("data")).get(0)).get("prpDdismantleDetails"); for (int i = 0; i < prpDdismantleDetails.size(); i++) { Map dismant = (Map) prpDdismantleDetails.get(i); formMap.put("prpDdismantleDetails[" + i + "].roleName", dismant.get("roleName").toString()); formMap.put("prpDdismantleDetails[" + i + "].id.agreementNo", ((Map) dismant.get("id")).get("agreementNo").toString()); formMap.put("prpDdismantleDetails[" + i + "].roleFlag", dismant.get("roleFlag").toString()); formMap.put("prpDdismantleDetails[" + i + "].id.configCode", ((Map) dismant.get("id")).get("configCode").toString()); formMap.put("prpDdismantleDetails[" + i + "].roleCode_uni", dismant.get("roleCode_uni").toString()); formMap.put("prpDdismantleDetails[" + i + "].id.roleCode", ((Map) dismant.get("id")).get("roleCode").toString()); formMap.put("prpDdismantleDetails[" + i + "].costRate", dismant.get("costRate").toString()); formMap.put("prpDdismantleDetails[" + i + "].businessNature", dismant.get("businessNature").toString()); formMap.put("prpDdismantleDetails[" + i + "].id.assignType", ((Map) dismant.get("id")).get("assignType").toString()); formMap.put("prpDdismantleDetails[" + i + "].flag", dismant.get("flag").toString()); } List prpCsaless = (List) ((Map) ((List) payMap.get("data")).get(0)).get("prpCsaless"); prpCsaless = prpCsaless == null ? new ArrayList() : prpCsaless; for (int j = 0; j < prpCsaless.size(); j++) { Map csale = (Map) prpCsaless.get(j); formMap.put("prpCsaless[" + j + "].totalRate", csale.get("totalRate").toString()); formMap.put("prpCsaless[" + j + "].riskCode", csale.get("riskCode").toString()); formMap.put("prpCsaless[" + j + "].id.salesCode", ((Map) csale.get("id")).get("salesCode").toString()); formMap.put("prpCsaless[" + j + "].oriSplitNumber", csale.get("oriSplitNumber").toString()); formMap.put("prpCsaless[" + j + "].id.salesDetailCode", ((Map) csale.get("id")).get("salesDetailCode").toString()); formMap.put("prpCsaless[" + j + "].totalRateMax", csale.get("totalRateMax").toString()); formMap.put("prpCsaless[" + j + "].splitWay", csale.get("splitWay").toString()); formMap.put("prpCsaless[" + j + "].splitFee", csale.get("splitFee").toString()); formMap.put("prpCsaless[" + j + "].id.proposalNo", ((Map) csale.get("id")).get("proposalNo").toString()); formMap.put("prpCsaless[" + j + "].salesName", csale.get("salesName").toString()); formMap.put("prpCsaless[" + j + "].salesDetailName", csale.get("salesDetailName").toString()); formMap.put("prpCsaless[" + j + "].splitRate", csale.get("splitRate").toString()); formMap.put("prpCsaless[" + j + "].flag", csale.get("flag").toString()); formMap.put("prpCsaless[" + j + "].agreementNo", csale.get("agreementNo").toString()); } Map<String, Object> outMap = new HashMap<String, Object>(); outMap.put("success", "false"); outMap.put("msg", ""); respStr = httpClientUtil.doPost("http://10.134.136.48:8000/prpall/business/insert4S.do", formMap, "GBK"); System.out.println(": " + respStr); String toubdH = respStr.split(",")[0]; String toubdH2 = ""; if (respStr.split(",").length > 1) toubdH2 = respStr.split(",")[1]; if (!respStr.contains("errorMessage")) { String respStr2 = httpClientUtil.doPost( "http://10.134.136.48:8000/prpall/business/editSubmitUndwrt.do?bizNo=" + toubdH, new HashMap<String, String>(), "GBK"); respStr2 = httpClientUtil.doPost( "http://10.134.136.48:8000/prpall/business/editSubmitUndwrt.do?bizNo=" + toubdH2, new HashMap<String, String>(), "GBK"); outMap.put("syxToubdh", toubdH); outMap.put("jqxToubdh", toubdH2); outMap.put("success", "true"); outMap.put("msg", ""); Document doc = null; try { String strURL = "http://10.134.136.48:8000/prpall/business/showUndwrtMsg.do?bizNo=" + toubdH + "&bizType=PROPOSAL"; if (!toubdH.equals("")) { respStr = httpClientUtil.doPost(strURL, new HashMap<String, String>(), "GBK"); System.out.println(respStr); doc = Jsoup.parse(respStr); ; System.out.println(", " + doc.title()); Element element = doc.getElementById("bpmUwNotionX[0].handleText"); if (element != null) { String syxHbYj = element.childNodes().get(0).toString(); outMap.put("syxHbYj", syxHbYj); } } strURL = "http://10.134.136.48:8000/prpall/business/showUndwrtMsg.do?bizNo=" + toubdH2 + "&bizType=PROPOSAL"; if (!toubdH2.equals("")) { respStr = httpClientUtil.doPost(strURL, new HashMap<String, String>(), "GBK"); System.out.println(respStr); doc = Jsoup.parse(respStr); System.out.println(", " + doc.title()); Element element = doc.getElementById("bpmUwNotionX[0].handleText"); if (element != null) { String jqxHbYj = element.childNodes().get(0).toString(); outMap.put("jqxHbYj", jqxHbYj); } } outMap.put("success", "true"); outMap.put("msg", ""); } catch (Exception e) { outMap.put("success", "false"); outMap.put("msg", e.getMessage()); } } else outMap.put("msg", respStr); // outMap.put("success", "true"); // outMap.put("msg", ""); // if(hebXianZ.equals("0")) { // outMap.put("syxToubdh", "TDDA201611020000717134"); // outMap.put("syxHbYj", " "); // } // // if(hebXianZ.equals("1")) { // outMap.put("jqxToubdh", "TDZA201611020000717134"); // outMap.put("jqxHbYj", " "); // } // // if(hebXianZ.equals("2")) { // outMap.put("syxToubdh", "TDDA201611020000717134"); // outMap.put("syxHbYj", " "); // outMap.put("jqxToubdh", "TDZA201611020000717134"); // outMap.put("jqxHbYj", " "); // } return JSONObject.fromObject(outMap).toString(); }
From source file:org.ednovo.gooru.application.util.ResourceImageUtil.java
public Map<String, Object> getResourceMetaData(String url, String resourceTitle, boolean fetchThumbnail) { Map<String, Object> metaData = new HashMap<String, Object>(); ResourceMetadataCo resourceFeeds = null; if (url != null && url.contains(VIMEO_VIDEO)) { resourceFeeds = getMetaDataFromVimeoVideo(url); } else if (url != null && url.contains(YOUTUBE_VIDEO)) { resourceFeeds = getYoutubeResourceFeeds(url, null); }/*from w ww . j a v a2 s . com*/ String description = ""; String title = ""; String videoDuration = ""; Set<String> images = new LinkedHashSet<String>(); if (resourceFeeds == null || resourceFeeds.getUrlStatus() == 404) { Document doc = null; try { if (url != null && (url.contains("http://") || url.contains("https://"))) { doc = Jsoup.connect(url).timeout(6000).get(); } } catch (Exception e) { e.printStackTrace(); } if (doc != null) { title = doc.title(); Elements meta = doc.getElementsByTag(META); if (meta != null) { for (Element element : meta) { if (element.attr(NAME) != null && element.attr(NAME).equalsIgnoreCase(DESCRIPTION)) { description = element.attr(CONTENT); break; } } } metaData.put(DESCRIPTION, description); if (fetchThumbnail) { Elements media = doc.select("[src]"); if (media != null) { for (Element src : media) { if (src.tagName().equals(IMG)) { images.add(src.attr("abs:src")); } if (images.size() >= SUGGEST_IMAGE_MAX_SIZE) { break; } } } } } } else { title = resourceFeeds.getTitle(); description = resourceFeeds.getDescription(); videoDuration = resourceFeeds.getDuration().toString(); } if (fetchThumbnail) { if (resourceFeeds != null && resourceFeeds.getThumbnail() != null) { images.add(resourceFeeds.getThumbnail()); } metaData.put(IMAGES, images); } metaData.put(TITLE, title); metaData.put(DESCRIPTION, description); metaData.put(DURATION, videoDuration); return metaData; }
From source file:org.opens.rules.doc.utils.ruledesign.extractor.ExtractRuleDesignHtmlCode.java
/** * Before using it please set the FOLDER variable with the path where you * want to create your extract html files. * * @param args the command line arguments *///from ww w .ja va 2 s. com public static void main(String[] args) { // first boucle for is for the theme number for (int i = 1; i < MAX_THEME_NUMBER; i++) { // second boucle for is for the critere number for (int j = 1; j < MAX_CRITERE_NUMBER; j++) { // third boucle for is for the test number for (int k = 1; k < MAX_TEST_NUMBER; k++) { URL url = null; try { Connection connection = Jsoup.connect(PREFIX_URL_TO_REFERENTIAL + i + "-" + j + "-" + k); Connection.Response resp = connection.response(); if (resp.statusCode() != 404) { url = new URL(PREFIX_URL_TO_REFERENTIAL + i + "-" + j + "-" + k); Document doc = Jsoup.parse(url, 4000); System.out.println(doc.title()); Elements summary = doc.select(".content.clear-block"); FileUtils.writeStringToFile( new File(FOLDER + "/RuleDesign/Rule-" + i + "-" + j + "-" + k + ".html"), summary.html()); } } catch (MalformedURLException ex) { System.out.println("URL MAL FORMEE"); } catch (IOException ex) { if (url != null) { System.out.println("URL 404 : " + url.toString()); } else { System.out.println("EMPTY URL"); } } } } } }
From source file:org.search.system.parsers.HtmlParser.java
@Override public Page parse(String url) { Document doc; try {/*from w ww. j a va2s . c om*/ doc = Jsoup.connect(url).get(); String title = ObjectUtils.defaultIfNull(doc.title(), " "); String description = ObjectUtils.defaultIfNull(getMetaTag(doc, "description"), " "); String keywords = ObjectUtils.defaultIfNull(getMetaTag(doc, "keywords"), " "); List<String> titles = StringUtil.splitThenTrim(title, " "); List<String> descriptions = StringUtil.splitThenTrim(description, " "); List<String> tags = StringUtil.splitThenTrim(keywords, ","); tags.addAll(titles); tags.addAll(descriptions); return new ValidatedPage(title, description, tags, url, DEFAULT_RANG); } catch (IOException e) { LogUtil.log(e.toString()); } return null; }
From source file:Project.FILER.java
public static String[] Dealing_Files(File f) throws IOException //return array of important strings in the file { Text = ""; String[] Importants = { "", "", "" }; //first element is the title,second is all headers,third is img alt org.jsoup.nodes.Document doc = Jsoup.parse(f, "UTF-8"); Importants[0] = doc.title(); //get the title of the file //Text=Text+" "+doc.title(); String tag = "h"; String All_Headers = ""; Elements Header;/* w w w. j a v a 2 s . c o m*/ for (int i = 1; i < 20; i++) //loop to get text with headers tag of the file { tag = "h" + String.valueOf(i); Header = doc.select(tag); if (Header.size() > 0) { Header = doc.getElementsByTag(tag); String pConcatenated = ""; for (Element x : Header) { pConcatenated += x.text() + " "; } All_Headers = All_Headers + pConcatenated; } else break; } Importants[1] = All_Headers; Text = Text + " " + doc.text(); //get the text of the document Elements img = doc.getElementsByTag("img"); //get the text with img tag for (Element element : img) { if (element.attr("alt") != null && !(element.attr("alt").equals(""))) { Text = Text + " " + element.attr("alt"); Importants[2] = Importants[2] + " " + element.attr("alt"); } } return Importants; }