List of usage examples for org.jsoup.nodes Element getElementsByTag
public Elements getElementsByTag(String tagName)
From source file:com.crawler.gsxt.htmlparser.GsxtAnhuiParser.java
public AicFeedJson anhuiResultParser(String html, Boolean isDebug) { LOOGER.info("The method of GsxtAnhuiParser.anhuiResultParser is begin !"); Gson gson = new Gson(); Map<String, Object> resultHtmlMap = gson.fromJson(html, new TypeToken<Map<String, Object>>() { }.getType());/*from w ww . ja va 2 s. c o m*/ //?result AicFeedJson gsxtFeedJson = new AicFeedJson(); //? AicpubInfo gsgsInfo = new AicpubInfo(); String gsgsxxHtml = (String) resultHtmlMap.get("gsgsxx"); Document gsgsxxDoc = Jsoup.parse(gsgsxxHtml); // ? //-----------------?-->? start----------------------- AicpubRegInfo gsgsDjInfo = new AicpubRegInfo(); Element djxxDiv = gsgsxxDoc.getElementById("jibenxinxi"); Elements djxx_tables = djxxDiv.getElementsByTag("table"); Element jbxx_table = djxx_tables.get(0); //? -->? AicpubRegBaseInfo gsgsDjJbInfo = new AicpubRegBaseInfo(); Elements jbxx_tds = jbxx_table.select("tr").select("td"); Elements jbxx_ths = jbxx_table.select("tr").select("th"); for (int i = 1; i < jbxx_ths.size(); i++) { if (jbxx_ths.get(i).text().trim().contains("?") || jbxx_ths.get(i).text().trim().contains("?")) { gsgsDjJbInfo.setNum(jbxx_tds.get(i - 1).text());// ?? } if (jbxx_ths.get(i).text().trim().contains("??")) { gsgsDjJbInfo.setName(jbxx_tds.get(i - 1).text());// ?? } if (jbxx_ths.get(i).text().trim().contains("")) { gsgsDjJbInfo.setType(jbxx_tds.get(i - 1).text());// } if (jbxx_ths.get(i).text().trim().contains("") || jbxx_ths.get(i).text().trim().contains("") || jbxx_ths.get(i).text().trim().contains("??")) { gsgsDjJbInfo.setLegalRepr(jbxx_tds.get(i - 1).text());// /?? } if (jbxx_ths.get(i).text().trim().contains("") || jbxx_ths.get(i).text().trim().contains("")) { gsgsDjJbInfo.setRegCapital(jbxx_tds.get(i - 1).text());// } if (jbxx_ths.get(i).text().trim().contains("?") || jbxx_ths.get(i).text().trim().contains("")) { gsgsDjJbInfo.setRegDateTime(jbxx_tds.get(i - 1).text());// ? } if (jbxx_ths.get(i).text().trim().contains("?") || jbxx_ths.get(i).text().trim().contains("?") || jbxx_ths.get(i).text().trim().contains("??")) { gsgsDjJbInfo.setAddress(jbxx_tds.get(i - 1).text());// ??/? } if (jbxx_ths.get(i).text().trim().contains("??") || jbxx_ths.get(i).text().trim().contains("???")) { gsgsDjJbInfo.setStartDateTime(jbxx_tds.get(i - 1).text());// ???? } if (jbxx_ths.get(i).text().trim().contains("??") || jbxx_ths.get(i).text().trim().contains("???")) { gsgsDjJbInfo.setEndDateTime(jbxx_tds.get(i - 1).text());// ????? } if (jbxx_ths.get(i).text().trim().contains("??")) { gsgsDjJbInfo.setBusinessScope(jbxx_tds.get(i - 1).text());// ?? } if (jbxx_ths.get(i).text().trim().contains("")) { gsgsDjJbInfo.setRegAuthority(jbxx_tds.get(i - 1).text());// } if (jbxx_ths.get(i).text().trim().contains("")) { gsgsDjJbInfo.setApprovalDateTime(jbxx_tds.get(i - 1).text());// } if (jbxx_ths.get(i).text().trim().contains("?")) { gsgsDjJbInfo.setRegStatus(jbxx_tds.get(i - 1).text());// ? } if (jbxx_ths.get(i).text().trim().contains("??")) { gsgsDjJbInfo.setFormType(jbxx_tds.get(i - 1).text());// ?? } } gsgsDjInfo.setBaseInfo(gsgsDjJbInfo); //? -->?? List<AicpubRegStohrStohrinvestInfo> gsgsDjGdjczList = new ArrayList<AicpubRegStohrStohrinvestInfo>(); Object gsgsxx_gdxx_detail_object = resultHtmlMap.get("gsgsxx_gdxx_detail"); if (gsgsxx_gdxx_detail_object != null) { ArrayList<String> gdDetailList = (ArrayList<String>) gsgsxx_gdxx_detail_object; for (String gdxxDetailHtml : gdDetailList) { Document gdxxDetailDoc = Jsoup.parse(gdxxDetailHtml); Element gdxxDetailDiv = gdxxDetailDoc.getElementById("details"); Elements gdxxDetailTables = gdxxDetailDiv.select("table"); if (gdxxDetailTables != null && !gdxxDetailTables.isEmpty()) { Element gdxxDetailTable = gdxxDetailTables.get(0); if (gdxxDetailTable != null) { Elements gdxxDetailTrs = gdxxDetailTable.select("tr"); AicpubRegStohrStohrinvestInfo gsgsDjGdjczInfo = new AicpubRegStohrStohrinvestInfo(); for (Element gdxxDetailTr : gdxxDetailTrs) { Elements gdxxDetail_tds = gdxxDetailTr.select("td"); int tdSize = gdxxDetail_tds.size(); if (tdSize == 3) { String stockholder = gdxxDetail_tds.get(0).text(); String subAmount = gdxxDetail_tds.get(1).text(); String paidAmount = gdxxDetail_tds.get(2).text(); gsgsDjGdjczInfo.setStockholder(stockholder); gsgsDjGdjczInfo.setSubAmount(subAmount); gsgsDjGdjczInfo.setPaidAmount(paidAmount); } else if (tdSize == 6) { String sub_method = gdxxDetail_tds.get(0).text(); String sub_amount = gdxxDetail_tds.get(1).text(); String sub_czDate = gdxxDetail_tds.get(2).text(); String paid_method = gdxxDetail_tds.get(3).text(); String paid_amount = gdxxDetail_tds.get(4).text(); String paid_czDate = gdxxDetail_tds.get(5).text(); AicpubRegStohrStohrinvestInfo.AmountDetail subAmountDetail = gsgsDjGdjczInfo.new AmountDetail(); AicpubRegStohrStohrinvestInfo.AmountDetail paidAmountDetail = gsgsDjGdjczInfo.new AmountDetail(); List<AmountDetail> subAmountDetailList = new ArrayList<AicpubRegStohrStohrinvestInfo.AmountDetail>(); List<AmountDetail> paidAmountDetailList = new ArrayList<AicpubRegStohrStohrinvestInfo.AmountDetail>(); subAmountDetail.investMethod = sub_method; subAmountDetail.investAmount = sub_amount; subAmountDetail.investDateTime = sub_czDate; paidAmountDetail.investMethod = paid_method; paidAmountDetail.investAmount = paid_amount; paidAmountDetail.investDateTime = paid_czDate; subAmountDetailList.add(subAmountDetail); paidAmountDetailList.add(paidAmountDetail); gsgsDjGdjczInfo.setSubAmountDetails(subAmountDetailList); gsgsDjGdjczInfo.setPaidAmountDetails(paidAmountDetailList); } else if (tdSize == 9) { String stockholder = gdxxDetail_tds.get(0).text(); String subAmount = gdxxDetail_tds.get(1).text(); String paidAmount = gdxxDetail_tds.get(2).text(); String sub_method = gdxxDetail_tds.get(3).text(); String sub_amount = gdxxDetail_tds.get(4).text(); String sub_czDate = gdxxDetail_tds.get(5).text(); String paid_method = gdxxDetail_tds.get(6).text(); String paid_amount = gdxxDetail_tds.get(7).text(); String paid_czDate = gdxxDetail_tds.get(8).text(); AicpubRegStohrStohrinvestInfo.AmountDetail subAmountDetail = gsgsDjGdjczInfo.new AmountDetail(); AicpubRegStohrStohrinvestInfo.AmountDetail paidAmountDetail = gsgsDjGdjczInfo.new AmountDetail(); List<AmountDetail> subAmountDetailList = new ArrayList<AicpubRegStohrStohrinvestInfo.AmountDetail>(); List<AmountDetail> paidAmountDetailList = new ArrayList<AicpubRegStohrStohrinvestInfo.AmountDetail>(); subAmountDetail.investMethod = sub_method; subAmountDetail.investAmount = sub_amount; subAmountDetail.investDateTime = sub_czDate; paidAmountDetail.investMethod = paid_method; paidAmountDetail.investAmount = paid_amount; paidAmountDetail.investDateTime = paid_czDate; gsgsDjGdjczInfo.setStockholder(stockholder); gsgsDjGdjczInfo.setSubAmount(subAmount); gsgsDjGdjczInfo.setPaidAmount(paidAmount); subAmountDetailList.add(subAmountDetail); paidAmountDetailList.add(paidAmountDetail); gsgsDjGdjczInfo.setSubAmountDetails(subAmountDetailList); gsgsDjGdjczInfo.setPaidAmountDetails(paidAmountDetailList); } } gsgsDjGdjczList.add(gsgsDjGdjczInfo); } } } } //? -->? List<AicpubRegStohrInfo> gsgsDjGdList = new ArrayList<AicpubRegStohrInfo>(); Element invDivElement = djxxDiv.getElementById("invDiv"); if (invDivElement != null) { Elements guxxTables = invDivElement.select("table"); if (guxxTables != null && !guxxTables.isEmpty()) { Element gdxx_table = guxxTables.get(0); Elements gdxx_trs = gdxx_table.select("tr"); int i = 0; for (Element gdxx_tr : gdxx_trs) { Elements gdxx_tds = gdxx_tr.select("td"); AicpubRegStohrInfo gsgsdjgdInfo = new AicpubRegStohrInfo(); int trSize = gdxx_tds.size(); if (trSize > 0) { String gdName = gdxx_tds.get(0).text(); gsgsdjgdInfo.setName(gdName); } if (trSize > 1) { String idType = gdxx_tds.get(1).text(); gsgsdjgdInfo.setIdType(idType); } if (trSize > 2) { String idNum = gdxx_tds.get(2).text(); gsgsdjgdInfo.setIdNum(idNum); } if (trSize > 3) { String gdType = gdxx_tds.get(3).text(); gsgsdjgdInfo.setType(gdType); } if (trSize > 4) { String gdxq = gdxx_tds.get(4).text(); if (!"".equals(gdxq)) { if (gsgsDjGdjczList.size() > i) { gsgsdjgdInfo.setStohrInvestInfo(gsgsDjGdjczList.get(i++)); } } } gsgsDjGdList.add(gsgsdjgdInfo); } } } gsgsDjInfo.setStohrInfos(gsgsDjGdList); //? -->?? List<AicpubRegChangeInfo> gsgsDjBgList = new ArrayList<AicpubRegChangeInfo>(); Element bgxx_table = djxxDiv.getElementById("altTab"); if (bgxx_table != null) { Elements bgxx_trs = bgxx_table.select("tr"); for (Element bgxx_tr : bgxx_trs) { Elements bgxx_tds = bgxx_tr.getElementsByTag("td"); if (bgxx_tds.size() == 4) { String bgItem = bgxx_tds.get(0).text(); String bgqContent = bgxx_tds.get(1).text(); String bghContent = bgxx_tds.get(2).text(); String bgDate = bgxx_tds.get(3).text(); AicpubRegChangeInfo gsgsDjBgInfo = new AicpubRegChangeInfo(); gsgsDjBgInfo.setItem(bgItem); gsgsDjBgInfo.setPreContent(bgqContent); gsgsDjBgInfo.setPostContent(bghContent); gsgsDjBgInfo.setDateTime(bgDate); gsgsDjBgList.add(gsgsDjBgInfo); } } } gsgsDjInfo.setChangeInfos(gsgsDjBgList); gsgsInfo.setRegInfo(gsgsDjInfo); //-----------------?-->? end----------------------- //-----------------?-->? start----------------------- AicpubArchiveInfo gsgsBaInfo = new AicpubArchiveInfo(); //?-->?? List<AicpubArchivePrimemberInfo> gsgsBaZyryInfos = new ArrayList<AicpubArchivePrimemberInfo>(); Element memDivElement_table = gsgsxxDoc.getElementById("t30"); if (memDivElement_table != null) { String table_name = memDivElement_table.select("th").get(0).text().trim(); if (table_name.contains("??")) { Element zyryTable = gsgsxxDoc.getElementById("memDiv"); if (null != zyryTable) { Elements zyryTrElements = zyryTable.select("tr"); for (Element zyryTr : zyryTrElements) { Elements zyryTdElements = zyryTr.select("td"); if (zyryTdElements.size() == 6) { String zyry_name = zyryTdElements.get(1).text(); String zyry_position = zyryTdElements.get(2).text(); String zyry_name2 = zyryTdElements.get(4).text(); String zyry_position2 = zyryTdElements.get(5).text(); if (!"".equals(zyry_name)) { AicpubArchivePrimemberInfo gsgsBaZyryInfo = new AicpubArchivePrimemberInfo(); gsgsBaZyryInfo.setName(zyry_name); gsgsBaZyryInfo.setPosition(zyry_position); gsgsBaZyryInfos.add(gsgsBaZyryInfo); } if (!"".equals(zyry_name2)) { AicpubArchivePrimemberInfo gsgsBaZyryInfo = new AicpubArchivePrimemberInfo(); gsgsBaZyryInfo.setName(zyry_name2); gsgsBaZyryInfo.setPosition(zyry_position2); gsgsBaZyryInfos.add(gsgsBaZyryInfo); } } } gsgsBaInfo.setPriMemberInfos(gsgsBaZyryInfos); } } if (table_name.contains("?")) { Element zyryTable = gsgsxxDoc.getElementById("memDiv"); if (null != zyryTable) { Elements zyryTrElements = zyryTable.select("tr"); for (Element zyryTr : zyryTrElements) { Elements zyryTdElements = zyryTr.select("td"); if (zyryTdElements.size() == 4) { String zyry_name = zyryTdElements.get(1).text(); String zyry_name2 = zyryTdElements.get(3).text(); if (!"".equals(zyry_name)) { AicpubArchivePrimemberInfo gsgsBaZyryInfo = new AicpubArchivePrimemberInfo(); gsgsBaZyryInfo.setName(zyry_name); gsgsBaZyryInfos.add(gsgsBaZyryInfo); } if (!"".equals(zyry_name2)) { AicpubArchivePrimemberInfo gsgsBaZyryInfo = new AicpubArchivePrimemberInfo(); gsgsBaZyryInfo.setName(zyry_name2); gsgsBaZyryInfos.add(gsgsBaZyryInfo); } } } gsgsBaInfo.setPriMemberInfos(gsgsBaZyryInfos); } } if (table_name.contains("?")) { //?-->? List<AicpubArchiveMainDeptInfo> aicpubArchiveMainDeptInfos = new ArrayList<AicpubArchiveMainDeptInfo>(); ; Element bmzhuguanDivElement = gsgsxxDoc.getElementById("invDiv"); if (null != bmzhuguanDivElement) { Elements zyryTrElements = bmzhuguanDivElement.select("tbody").select("tr"); for (Element zyryTr : zyryTrElements) { Elements zyryTdElements = zyryTr.select("td"); String type = zyryTdElements.get(1).text(); String name = zyryTdElements.get(2).text(); String idType = zyryTdElements.get(3).text(); String idNum = zyryTdElements.get(4).text(); String showDate = zyryTdElements.get(5).text(); AicpubArchiveMainDeptInfo aicpubArchiveMainDeptInfo = new AicpubArchiveMainDeptInfo(); aicpubArchiveMainDeptInfo.setType(type); aicpubArchiveMainDeptInfo.setName(name); aicpubArchiveMainDeptInfo.setIdType(idType); aicpubArchiveMainDeptInfo.setIdNum(idNum); aicpubArchiveMainDeptInfo.setShowDate(showDate); aicpubArchiveMainDeptInfos.add(aicpubArchiveMainDeptInfo); } gsgsBaInfo.setMainDeptInfo(aicpubArchiveMainDeptInfos); } } } //?-->? List<AicpubArchiveBranchInfo> gsgsBaFzjgInfos = null; Element gsgsBaFzjgDiv = null; if (gsgsxxDoc.getElementById("childDiv") != null) { gsgsBaFzjgInfos = new ArrayList<AicpubArchiveBranchInfo>(); gsgsBaFzjgDiv = gsgsxxDoc.getElementById("childDiv"); Elements gsgsBaFzjgTrs = gsgsBaFzjgDiv.select("tr"); for (Element gsgsBaFzjgTr : gsgsBaFzjgTrs) { Elements gsgsBaFzjgTds = gsgsBaFzjgTr.select("td"); String fzjg_num = gsgsBaFzjgTds.get(1).text(); String fzjg_name = gsgsBaFzjgTds.get(2).text(); String fzjg_regAuthority = gsgsBaFzjgTds.get(3).text(); AicpubArchiveBranchInfo gsgsBaFzjgInfo = new AicpubArchiveBranchInfo(); gsgsBaFzjgInfo.setNum(fzjg_num); gsgsBaFzjgInfo.setName(fzjg_name); gsgsBaFzjgInfo.setRegAuthority(fzjg_regAuthority); gsgsBaFzjgInfos.add(gsgsBaFzjgInfo); } } gsgsBaInfo.setBranchInfos(gsgsBaFzjgInfos); //?-->? AicpubArchiveClearInfo gsgsBaQsInfo = new AicpubArchiveClearInfo(); Element beianElement = gsgsxxDoc.getElementById("beian"); if (null != beianElement) { Elements tables = beianElement.select("table"); if (tables.size() == 6) { Element gsgsBaQsTable = tables.get(tables.size() - 1); Elements gsgsBaQsTds = gsgsBaQsTable.select("td"); String leader = gsgsBaQsTds.get(0).text(); String members = gsgsBaQsTds.get(1).text(); List<String> memList = new ArrayList<String>(); memList.add(members); gsgsBaQsInfo.setLeader(leader); gsgsBaQsInfo.setMembers(memList); } } gsgsBaInfo.setClearInfo(gsgsBaQsInfo); gsgsInfo.setArchiveInfo(gsgsBaInfo); //-----------------?-->? end----------------------- //-----------------?-->? start----------------------- AicpubChatMortgInfo gsgsDcdydjInfo = new AicpubChatMortgInfo(); List<AicpubCChatMortgInfo> gsgsDcdydjDcdydjInfos = null; Element gsgsDcdydjDiv = null; if (gsgsxxDoc.getElementById("dongchandiya") != null) { gsgsDcdydjDcdydjInfos = new ArrayList<AicpubCChatMortgInfo>(); gsgsDcdydjDiv = gsgsxxDoc.getElementById("dongchandiya"); Elements gsgsDcdydjTrs = gsgsDcdydjDiv.select("#mortDiv").select("table").select("tr"); for (Element gsgsDcdydjTr : gsgsDcdydjTrs) { Elements gsgsDcdydjTds = gsgsDcdydjTr.select("td"); String regNum = gsgsDcdydjTds.get(1).text(); String regDate = gsgsDcdydjTds.get(2).text(); String reg_Authority = gsgsDcdydjTds.get(3).text(); String bdbzqAmount = gsgsDcdydjTds.get(4).text(); String status = gsgsDcdydjTds.get(5).text(); String pubDate = gsgsDcdydjTds.get(6).text(); String detail = gsgsDcdydjTds.get(7).text(); AicpubCChatMortgInfo gsgsDcdydjDcdydjInfo = new AicpubCChatMortgInfo(); gsgsDcdydjDcdydjInfo.setRegNum(regNum); gsgsDcdydjDcdydjInfo.setRegDateTime(regDate); gsgsDcdydjDcdydjInfo.setRegAuthority(reg_Authority); gsgsDcdydjDcdydjInfo.setGuaranteedDebtAmount(bdbzqAmount); gsgsDcdydjDcdydjInfo.setStatus(status); gsgsDcdydjDcdydjInfo.setPubDateTime(pubDate); gsgsDcdydjDcdydjInfo.setDetail(detail); gsgsDcdydjDcdydjInfos.add(gsgsDcdydjDcdydjInfo); } } if (isDebug) { gsgsDcdydjInfo.setHtml(gsgsDcdydjDiv.toString()); } gsgsDcdydjInfo.setChatMortgInfos(gsgsDcdydjDcdydjInfos); gsgsInfo.setChatMortgInfo(gsgsDcdydjInfo); //-----------------?-->? end----------------------- //-----------------?-->?? start----------------------- AicpubEqumortgregInfo gsgsGqczdjInfo = new AicpubEqumortgregInfo(); List<AicpubEEqumortgregInfo> gsgsGqczdjGqczdjInfos = null; Element gsgsGqczdjDiv = null; if (gsgsxxDoc.getElementById("guquanchuzhi") != null) { gsgsGqczdjGqczdjInfos = new ArrayList<AicpubEEqumortgregInfo>(); gsgsGqczdjDiv = gsgsxxDoc.getElementById("guquanchuzhi"); Elements gsgsGqczdjTrs = gsgsGqczdjDiv.select("#pledgeDiv").select("table").select("tr"); for (Element gsgsGqczdjTr : gsgsGqczdjTrs) { Elements gsgsGqczdjTds = gsgsGqczdjTr.select("tr").select("td"); String regNum = gsgsGqczdjTds.get(1).text(); String czr = gsgsGqczdjTds.get(2).text(); String czrIdNum = gsgsGqczdjTds.get(3).text(); String czgqAmount = gsgsGqczdjTds.get(4).text(); String zqr = gsgsGqczdjTds.get(5).text(); String zqrIdNum = gsgsGqczdjTds.get(6).text(); String gqczsldjDate = gsgsGqczdjTds.get(7).text(); String status = gsgsGqczdjTds.get(8).text(); String pubDate = gsgsGqczdjTds.get(9).text(); String changeSitu = gsgsGqczdjTds.get(10).text(); AicpubEEqumortgregInfo gsgsGqczdjGqczdjInfo = new AicpubEEqumortgregInfo(); gsgsGqczdjGqczdjInfo.setRegNum(regNum); gsgsGqczdjGqczdjInfo.setMortgagorName(czr); gsgsGqczdjGqczdjInfo.setMortgagorIdNum(czrIdNum); gsgsGqczdjGqczdjInfo.setMortgAmount(czgqAmount); gsgsGqczdjGqczdjInfo.setMortgageeName(zqr); gsgsGqczdjGqczdjInfo.setMortgageeIdNum(zqrIdNum); gsgsGqczdjGqczdjInfo.setRegDateTime(gqczsldjDate); gsgsGqczdjGqczdjInfo.setStatus(status); gsgsGqczdjGqczdjInfo.setPubDate(pubDate); gsgsGqczdjGqczdjInfo.setChangeSitu(changeSitu); gsgsGqczdjGqczdjInfos.add(gsgsGqczdjGqczdjInfo); } if (isDebug) { gsgsGqczdjInfo.setHtml(gsgsGqczdjDiv.toString()); } } gsgsGqczdjInfo.setEqumortgregInfos(gsgsGqczdjGqczdjInfos); gsgsInfo.setEquMortgRegInfo(gsgsGqczdjInfo); //-----------------?-->?? end----------------------- //-----------------?-->? start----------------------- /* * ? */ AicpubAdmpunishInfo gsgsXzcfInfo = new AicpubAdmpunishInfo(); Element gsgsXzcfXzcfDiv = null; List<AicpubAAdmpunishInfo> gsgsXzcfXzcfInfos = null; if (gsgsxxDoc.getElementById("xingzhengchufa") != null) { gsgsXzcfXzcfInfos = new ArrayList<AicpubAAdmpunishInfo>(); gsgsXzcfXzcfDiv = gsgsxxDoc.getElementById("xingzhengchufa"); Elements gsgsXzcfXzcfTrs = gsgsXzcfXzcfDiv.select("#punTab").select("table").select("tr"); for (Element gsgsXzcfXzcfTr : gsgsXzcfXzcfTrs) { Elements gsgsGqczdjTds = gsgsXzcfXzcfTr.select("td"); String xzcfjdsNum = gsgsGqczdjTds.get(1).text(); String wfxwType = gsgsGqczdjTds.get(2).text(); String xzcfContent = gsgsGqczdjTds.get(3).text(); String zcxzcfjdjgName = gsgsGqczdjTds.get(4).text(); String zcxzcfjdDate = gsgsGqczdjTds.get(5).text(); AicpubAAdmpunishInfo gsgsXzcfXzcfInfo = new AicpubAAdmpunishInfo(); gsgsXzcfXzcfInfo.setPunishRepNum(xzcfjdsNum); gsgsXzcfXzcfInfo.setIllegalActType(wfxwType); gsgsXzcfXzcfInfo.setPunishContent(zcxzcfjdjgName); gsgsXzcfXzcfInfo.setDeciAuthority(xzcfContent); gsgsXzcfXzcfInfo.setDeciDateTime(zcxzcfjdDate); gsgsXzcfXzcfInfos.add(gsgsXzcfXzcfInfo); } } if (isDebug) { gsgsXzcfInfo.setHtml(gsgsXzcfXzcfDiv.toString()); } gsgsXzcfInfo.setAdmPunishInfos(gsgsXzcfXzcfInfos); gsgsInfo.setAdmPunishInfo(gsgsXzcfInfo); //-----------------?-->? end----------------------- //-----------------?-->??? start----------------------- AicpubOperanomaInfo gsgsJyycInfo = new AicpubOperanomaInfo(); List<AicpubOOperanomaInfo> gsgsJyycJyycInfos = null; Element gsgsJyycDiv = null; if (gsgsxxDoc.getElementById("jingyingyichangminglu") != null) { gsgsJyycJyycInfos = new ArrayList<AicpubOOperanomaInfo>(); gsgsJyycDiv = gsgsxxDoc.getElementById("jingyingyichangminglu"); Elements gsgsJyycTrs = gsgsJyycDiv.getElementById("excDiv").select("tr"); for (Element gsgsJyycTr : gsgsJyycTrs) { Elements gsgsJyycTds = gsgsJyycTr.select("td"); String lrjyycmlCause = gsgsJyycTds.get(1).text(); String lrDate = gsgsJyycTds.get(2).text(); String ycjyycmlCause = gsgsJyycTds.get(3).text(); String ycDate = gsgsJyycTds.get(4).text(); String zcjdAuthority = gsgsJyycTds.get(5).text(); AicpubOOperanomaInfo gsgsJyycJyycInfo = new AicpubOOperanomaInfo(); gsgsJyycJyycInfo.setIncludeCause(lrjyycmlCause); gsgsJyycJyycInfo.setIncludeDateTime(lrDate); gsgsJyycJyycInfo.setRemoveCause(ycjyycmlCause); gsgsJyycJyycInfo.setRemoveDateTime(ycDate); gsgsJyycJyycInfo.setAuthority(zcjdAuthority); gsgsJyycJyycInfos.add(gsgsJyycJyycInfo); } } if (isDebug) { gsgsJyycInfo.setHtml(gsgsJyycDiv.toString()); } gsgsJyycInfo.setOperAnomaInfos(gsgsJyycJyycInfos); gsgsInfo.setOperAnomaInfo(gsgsJyycInfo); //-----------------?-->??? end----------------------- //-----------------?-->??? start----------------------- AicpubSerillegalInfo gsgsYzwfInfo = new AicpubSerillegalInfo(); List<AicpubSSerillegalInfo> gsgsYzwfYzwfInfos = null; Element gsgsYzwfDiv = null; if (gsgsxxDoc.getElementById("yanzhongweifaqiye") != null) { gsgsYzwfYzwfInfos = new ArrayList<AicpubSSerillegalInfo>(); gsgsYzwfDiv = gsgsxxDoc.getElementById("yanzhongweifaqiye"); Elements gsgsYzwfTrs = gsgsYzwfDiv.getElementById("serillDiv").select("tr"); for (Element gsgsYzwfTr : gsgsYzwfTrs) { Elements gsgsYzwfTds = gsgsYzwfTr.select("td"); String lryzwfqymdCause = gsgsYzwfTds.get(1).text(); String lrDate = gsgsYzwfTds.get(2).text(); String ycyzwfqymdCause = gsgsYzwfTds.get(3).text(); String ycDate = gsgsYzwfTds.get(4).text(); String zcjdAuthority = gsgsYzwfTds.get(5).text(); AicpubSSerillegalInfo gsgsYzwfYzwfInfo = new AicpubSSerillegalInfo(); gsgsYzwfYzwfInfo.setIncludeCause(lryzwfqymdCause); gsgsYzwfYzwfInfo.setIncludeDateTime(lrDate); gsgsYzwfYzwfInfo.setRemoveCause(ycyzwfqymdCause); gsgsYzwfYzwfInfo.setRemoveDateTime(ycDate); gsgsYzwfYzwfInfo.setDeciAuthority(zcjdAuthority); gsgsYzwfYzwfInfos.add(gsgsYzwfYzwfInfo); } if (isDebug) { gsgsYzwfInfo.setHtml(gsgsYzwfDiv.toString()); } } gsgsYzwfInfo.setSerIllegalInfos(gsgsYzwfYzwfInfos); gsgsInfo.setSerIllegalInfo(gsgsYzwfInfo); //-----------------?-->??? end----------------------- //-----------------?-->? start----------------------- AicpubCheckInfo gsgsCcjcInfo = new AicpubCheckInfo(); List<AicpubCCheckInfo> gsgsCcjcCcjcInfos = null; Element gsgsCcjcDiv = null; if (gsgsxxDoc.getElementById("chouchaxinxi") != null) { gsgsCcjcCcjcInfos = new ArrayList<AicpubCCheckInfo>(); gsgsCcjcDiv = gsgsxxDoc.getElementById("chouchaxinxi"); Elements gsgsCcjcTrs = gsgsCcjcDiv.getElementById("spotCheckDiv").select("tr"); for (Element gsgsCcjcTr : gsgsCcjcTrs) { Elements gsgsCcjcTds = gsgsCcjcTr.select("td"); String jcssAuthority = gsgsCcjcTds.get(1).text(); String gsgsCcjc_type = gsgsCcjcTds.get(2).text(); String gsgsCcjc_date = gsgsCcjcTds.get(3).text(); String gsgsCcjc_result = gsgsCcjcTds.get(4).text(); AicpubCCheckInfo gsgsCcjcCcjcInfo = new AicpubCCheckInfo(); gsgsCcjcCcjcInfo.setCheckImplAuthority(jcssAuthority); gsgsCcjcCcjcInfo.setType(gsgsCcjc_type); gsgsCcjcCcjcInfo.setDateTime(gsgsCcjc_date); gsgsCcjcCcjcInfo.setResult(gsgsCcjc_result); gsgsCcjcCcjcInfos.add(gsgsCcjcCcjcInfo); } } if (isDebug) { gsgsCcjcInfo.setHtml(gsgsCcjcDiv.toString()); } gsgsCcjcInfo.setCheckInfos(gsgsCcjcCcjcInfos); gsgsInfo.setCheckInfo(gsgsCcjcInfo); gsxtFeedJson.setAicPubInfo(gsgsInfo); //-----------------?--> end----------------------- //??? EntpubInfo qygsInfo = new EntpubInfo(); String qygsxxHtml = (String) resultHtmlMap.get("qygsxx"); Document qygsxxDoc = Jsoup.parse(qygsxxHtml); //-----------------??-->? start----------------------- //??--?? List<String> qynbDetailList = (ArrayList<String>) resultHtmlMap.get("qygsxx_qynb_detail"); List<EntpubAnnreportInfo> qygsQynbInfos = null; if (qynbDetailList != null && !qynbDetailList.isEmpty()) { qygsQynbInfos = new ArrayList<EntpubAnnreportInfo>(); Element qygsnbDiv = qygsxxDoc.getElementById("qiyenianbao"); Elements qygsnbTrs = qygsnbDiv.select("tr"); int k = 0; for (int j = 2; j < qygsnbTrs.size(); j++) { EntpubAnnreportInfo qygsQynbInfo = new EntpubAnnreportInfo(); Elements qygsnbTds = qygsnbTrs.get(j).select("td"); String submitYear = qygsnbTds.get(1).text(); String pubDate = qygsnbTds.get(2).text(); qygsQynbInfo.setSubmitYear(submitYear); qygsQynbInfo.setPubDateTime(pubDate); String qynbDetailHtml = qynbDetailList.get(k++); Document qygsxxnbDetailDoc = Jsoup.parse(qynbDetailHtml); Elements qygsnbxxTables = qygsxxnbDetailDoc.select("#sifapanding").select("table"); int qygsnbxxTables_size = qygsnbxxTables.size(); for (int t = 0; t < qygsnbxxTables_size; t++) { Element qygsxx_qynb_info_table = qygsnbxxTables.get(t); Elements qygsxx_qynb_info_ths = getElements(qygsxx_qynb_info_table, "th"); Elements qygsxx_qynb_info_trs = getElements(qygsxx_qynb_info_table, "tr"); Elements qygsxx_qynb_info_tds = getElements(qygsxx_qynb_info_table, "td"); if (t == 0) { //??--> ?? EntpubAnnreportBaseInfo qygsQynbJbInfo = new EntpubAnnreportBaseInfo(); for (int i = 2; i < qygsxx_qynb_info_ths.size(); i++) { String th_name = qygsxx_qynb_info_ths.get(i).text().trim(); if (th_name.contains("?") || th_name.contains("?")) { String num = qygsxx_qynb_info_tds.get(i - 2).text().trim(); qygsQynbJbInfo.setNum(num); } if (th_name.contains("???")) { String name = qygsxx_qynb_info_tds.get(i - 2).text().trim(); qygsQynbJbInfo.setName(name); } if (th_name.contains("???")) { String tel = qygsxx_qynb_info_tds.get(i - 2).text().trim(); qygsQynbJbInfo.setTel(tel); } if (th_name.contains("?")) { String zipCode = qygsxx_qynb_info_tds.get(i - 2).text().trim(); qygsQynbJbInfo.setZipCode(zipCode); } if (th_name.contains("??")) { String address = qygsxx_qynb_info_tds.get(i - 2).text().trim(); qygsQynbJbInfo.setAddress(address); } if (th_name.contains("?")) { String email = qygsxx_qynb_info_tds.get(i - 2).text().trim(); qygsQynbJbInfo.setEmail(email); } if (th_name.contains("?????")) { String isStohrEquTransferred = qygsxx_qynb_info_tds.get(i - 2).text().trim(); qygsQynbJbInfo.setIsStohrEquTransferred(isStohrEquTransferred); } if (th_name.contains("????")) { String operatingStatus = qygsxx_qynb_info_tds.get(i - 2).text().trim(); qygsQynbJbInfo.setOperatingStatus(operatingStatus); } if (th_name.contains("?")) { String hasWebsiteOrStore = qygsxx_qynb_info_tds.get(i - 2).text().trim(); qygsQynbJbInfo.setHasWebsiteOrStore(hasWebsiteOrStore); } if (th_name.contains("?????") || th_name.contains("????")) { String hasInvestInfoOrPurchOtherCorpEqu = qygsxx_qynb_info_tds.get(i - 2).text() .trim(); qygsQynbJbInfo .setHasInvestInfoOrPurchOtherCorpEqu(hasInvestInfoOrPurchOtherCorpEqu); } if (th_name.contains("")) { String empNum = qygsxx_qynb_info_tds.get(i - 2).text().trim(); qygsQynbJbInfo.setEmpNum(empNum); } if (th_name.contains("")) { String affiliation = qygsxx_qynb_info_tds.get(i - 2).text().trim(); qygsQynbJbInfo.setAffiliation(affiliation); } qygsQynbInfo.setBaseInfo(qygsQynbJbInfo); } } else { String table_name = qygsxx_qynb_info_ths.get(0).text(); if (table_name.contains("?")) { // ??--> ? List<EntpubAnnreportWebsiteInfo> qygsQynbWzhwdInfos = new ArrayList<EntpubAnnreportWebsiteInfo>(); Elements wzwdxxTrs = qygsxx_qynb_info_trs; for (Element wzwdxxTr : wzwdxxTrs) { if (!"".equals(wzwdxxTr.attr("id")) && !wzwdxxTr.hasAttr("style")) { Elements wzwdxxTds = wzwdxxTr.select("td"); String wzwd_type = wzwdxxTds.get(0).text(); String wzwd_name = wzwdxxTds.get(1).text(); String website = wzwdxxTds.get(2).text(); EntpubAnnreportWebsiteInfo qygsQynbWzhwdInfo = new EntpubAnnreportWebsiteInfo(); qygsQynbWzhwdInfo.setType(wzwd_type); qygsQynbWzhwdInfo.setName(wzwd_name); qygsQynbWzhwdInfo.setWebsite(website); qygsQynbWzhwdInfos.add(qygsQynbWzhwdInfo); } } qygsQynbInfo.setWebsiteInfos(qygsQynbWzhwdInfos); } else if (table_name.contains("??")) { //??--> ?? List<EntpubAnnreportStohrinvestInfo> qygsQynbGdjczInfos = new ArrayList<EntpubAnnreportStohrinvestInfo>();// ?? Elements gdczxxTrs = qygsxx_qynb_info_trs; for (Element gdczxxTr : gdczxxTrs) { if (!"".equals(gdczxxTr.attr("id")) && !gdczxxTr.hasAttr("style")) { Elements gdczxxTds = gdczxxTr.select("td"); String stockholder = gdczxxTds.get(0).text(); String rjczAmount = gdczxxTds.get(1).text(); String rjczDate = gdczxxTds.get(2).text(); String rjczMethod = gdczxxTds.get(3).text(); String sjczAmount = gdczxxTds.get(4).text(); String sjczDate = gdczxxTds.get(5).text(); String sjczMethod = gdczxxTds.get(6).text(); EntpubAnnreportStohrinvestInfo qygsQynbGdjczInfo = new EntpubAnnreportStohrinvestInfo(); qygsQynbGdjczInfo.setStockholder(stockholder); qygsQynbGdjczInfo.setSubAmount(rjczAmount); qygsQynbGdjczInfo.setSubDateTime(rjczDate); qygsQynbGdjczInfo.setSubMethod(rjczMethod); qygsQynbGdjczInfo.setPaidAmount(sjczAmount); qygsQynbGdjczInfo.setPaidDateTime(sjczDate); qygsQynbGdjczInfo.setPaidMethod(sjczMethod); qygsQynbGdjczInfos.add(qygsQynbGdjczInfo); } } qygsQynbInfo.setStohrInvestInfos(qygsQynbGdjczInfos); } else if (table_name.contains("?")) { //??--> ? List<EntpubAnnreportExtinvestInfo> qygsQynbDwtzInfos = new ArrayList<EntpubAnnreportExtinvestInfo>();// ? Elements dwtzxxTrs = qygsxx_qynb_info_trs; for (Element dwtzxxTr : dwtzxxTrs) { if (!"".equals(dwtzxxTr.attr("id")) && !dwtzxxTr.hasAttr("style")) { Elements dwtzxxTds = dwtzxxTr.select("td"); String tzslqyhgmgqqyName = dwtzxxTds.get(0).text(); String regNum = dwtzxxTds.get(1).text(); EntpubAnnreportExtinvestInfo qygsQynbDwtzInfo = new EntpubAnnreportExtinvestInfo(); qygsQynbDwtzInfo.setEnterpriseName(tzslqyhgmgqqyName); qygsQynbDwtzInfo.setRegNum(regNum); qygsQynbDwtzInfos.add(qygsQynbDwtzInfo); } } qygsQynbInfo.setExtInvestInfos(qygsQynbDwtzInfos); } else if (table_name.contains("??")) { // ?? EntpubAnnreportAssetInfo qygsQynbQyzczkInfo = new EntpubAnnreportAssetInfo(); String assetAmount = qygsxx_qynb_info_tds.get(0).text(); String syzqyhj = qygsxx_qynb_info_tds.get(1).text(); String liabilityAmount = qygsxx_qynb_info_tds.get(2).text(); String salesAmount = qygsxx_qynb_info_tds.get(3).text(); String profitAmount = qygsxx_qynb_info_tds.get(4).text(); String xszezzyywsr = qygsxx_qynb_info_tds.get(5).text(); String netProfit = qygsxx_qynb_info_tds.get(6).text(); String taxesAmount = qygsxx_qynb_info_tds.get(7).text(); qygsQynbQyzczkInfo.setAssetAmount(assetAmount); qygsQynbQyzczkInfo.setTotalEquity(syzqyhj); qygsQynbQyzczkInfo.setLiabilityAmount(liabilityAmount); qygsQynbQyzczkInfo.setSalesAmount(salesAmount); qygsQynbQyzczkInfo.setProfitAmount(profitAmount); qygsQynbQyzczkInfo.setPriBusiIncomeInSalesAmount(xszezzyywsr); qygsQynbQyzczkInfo.setNetProfit(netProfit); qygsQynbQyzczkInfo.setTaxesAmount(taxesAmount); qygsQynbInfo.setAssetInfo(qygsQynbQyzczkInfo);// ?? } else if (table_name.contains("??")) { //?? List<EntpubAnnreportManageInfo> manageInfos = new ArrayList<EntpubAnnreportManageInfo>(); EntpubAnnreportManageInfo manageInfo = new EntpubAnnreportManageInfo(); Elements qynbscjyqkTds = qygsxx_qynb_info_tds; String saleSum = qynbscjyqkTds.get(0).text(); String salarySum = qynbscjyqkTds.get(1).text(); String netProfit = qynbscjyqkTds.get(2).text(); manageInfo.setSaleSum(saleSum); manageInfo.setSalarySum(salarySum); manageInfo.setNetProfit(netProfit); manageInfos.add(manageInfo); qygsQynbInfo.setManageInfos(manageInfos); } else if (table_name.contains("??????")) { // ?????? List<EntpubAnnreportExtguaranteeInfo> qygsQynbDwtgbzdbInfos = new ArrayList<EntpubAnnreportExtguaranteeInfo>(); Elements dwdbxxTrs = qygsxx_qynb_info_trs; for (Element dwdbxxTr : dwdbxxTrs) { if (!"".equals(dwdbxxTr.attr("id")) && !dwdbxxTr.hasAttr("style")) { EntpubAnnreportExtguaranteeInfo qygsQynbDwtgbzdbInfo = new EntpubAnnreportExtguaranteeInfo(); Elements dwdbxxTds = dwdbxxTr.select("td"); String creditor = dwdbxxTds.get(0).text(); String debtor = dwdbxxTds.get(1).text(); String priCredRightType = dwdbxxTds.get(2).text(); String priCredRightAmount = dwdbxxTds.get(3).text(); String exeDebtDeadline = dwdbxxTds.get(4).text(); String guaranteePeriod = dwdbxxTds.get(5).text(); String guaranteeMethod = dwdbxxTds.get(6).text(); if (dwdbxxTds.size() > 7) { String guaranteeScope = dwdbxxTds.get(7).text(); qygsQynbDwtgbzdbInfo.setGuaranteeScope(guaranteeScope); } qygsQynbDwtgbzdbInfo.setCreditor(creditor); qygsQynbDwtgbzdbInfo.setDebtor(debtor); qygsQynbDwtgbzdbInfo.setPriCredRightType(priCredRightType); qygsQynbDwtgbzdbInfo.setPriCredRightAmount(priCredRightAmount); qygsQynbDwtgbzdbInfo.setExeDebtDeadline(exeDebtDeadline); qygsQynbDwtgbzdbInfo.setGuaranteePeriod(guaranteePeriod); qygsQynbDwtgbzdbInfo.setGuaranteeMethod(guaranteeMethod); qygsQynbDwtgbzdbInfos.add(qygsQynbDwtgbzdbInfo); } } qygsQynbInfo.setExtGuaranteeInfos(qygsQynbDwtgbzdbInfos);// ?????? } else if (table_name.contains("??")) { //?? List<EntpubAnnreportEquchangeInfo> qygsQynbGqbgInfos = new ArrayList<EntpubAnnreportEquchangeInfo>(); Elements gqbgxxTrs = qygsxx_qynb_info_trs; for (Element gqbgxxTr : gqbgxxTrs) { if (!"".equals(gqbgxxTr.attr("id")) && !gqbgxxTr.hasAttr("style")) { Elements gqbgxxTds = gqbgxxTr.select("td"); String stockholder = gqbgxxTds.get(0).text(); String bgqOwnershipRatio = gqbgxxTds.get(1).text(); String bghOwnershipRatio = gqbgxxTds.get(2).text(); String bgDate = gqbgxxTds.get(3).text(); EntpubAnnreportEquchangeInfo qygsQynbGqbgInfo = new EntpubAnnreportEquchangeInfo(); qygsQynbGqbgInfo.setStockholder(stockholder); qygsQynbGqbgInfo.setPreOwnershipRatio(bgqOwnershipRatio); qygsQynbGqbgInfo.setPostOwnershipRatio(bghOwnershipRatio); qygsQynbGqbgInfo.setDateTime(bgDate); qygsQynbGqbgInfos.add(qygsQynbGqbgInfo); } } qygsQynbInfo.setEquChangeInfos(qygsQynbGqbgInfos);// ?? } else if (table_name.contains("")) { // List<EntpubAnnreportModifyInfo> qygsQynbXgjlInfos = new ArrayList<EntpubAnnreportModifyInfo>(); Elements xgjlxxTrs = qygsxx_qynb_info_trs; for (Element xgjlxxTr : xgjlxxTrs) { if (!"".equals(xgjlxxTr.attr("id")) && !xgjlxxTr.hasAttr("style")) { Elements xgjlxxTds = xgjlxxTr.select("td"); String xgItem = xgjlxxTds.get(1).text(); String xgqContent = xgjlxxTds.get(2).text(); String xghContent = xgjlxxTds.get(3).text(); String xgDate = xgjlxxTds.get(4).text(); EntpubAnnreportModifyInfo qygsQynbXgjlInfo = new EntpubAnnreportModifyInfo(); qygsQynbXgjlInfo.setItem(xgItem); qygsQynbXgjlInfo.setPreContent(xgqContent); qygsQynbXgjlInfo.setPostContent(xghContent); qygsQynbXgjlInfo.setDateTime(xgDate); qygsQynbXgjlInfos.add(qygsQynbXgjlInfo); } } qygsQynbInfo.setChangeInfos(qygsQynbXgjlInfos); } } } qygsQynbInfos.add(qygsQynbInfo); } } qygsInfo.setAnnReports(qygsQynbInfos); //-----------------??-->?? end----------------------- //-----------------??-->?? start----------------------- EntpubStohrinvestInfo qygsGdjczInfo = new EntpubStohrinvestInfo(); Element qygsgdczDiv = qygsxxDoc.getElementById("touziren"); //??-->?? List<EntpubSStohrinvestInfo> qygsGdjczGdjczs = new ArrayList<EntpubSStohrinvestInfo>(); if (qygsxxDoc.getElementById("touziren") != null) { Element qygsgdczxxDiv = qygsgdczDiv.getElementById("gdDiv"); Elements qygsgdczxxTrs = qygsgdczxxDiv.select("tr"); if (qygsgdczxxTrs.size() > 3) { for (int j = 3; j < qygsgdczxxTrs.size(); j++) { Elements qygsgdczxxTds = qygsgdczxxTrs.get(j).select("td"); String stockholder = qygsgdczxxTds.get(0).text(); String rjAmount = qygsgdczxxTds.get(1).text(); String sjAmount = qygsgdczxxTds.get(2).text(); String rj_method = qygsgdczxxTds.get(3).text(); String rj_amount = qygsgdczxxTds.get(4).text(); String rj_date = qygsgdczxxTds.get(5).text(); String rj_showdate = qygsgdczxxTds.get(6).text(); String sj_method = qygsgdczxxTds.get(7).text(); String sj_amount = qygsgdczxxTds.get(8).text(); String sj_date = qygsgdczxxTds.get(9).text(); String sj_showdate = qygsgdczxxTds.get(10).text(); EntpubSStohrinvestInfo qygsGdjczGdjczInfo = new EntpubSStohrinvestInfo(); EntpubSStohrinvestInfo.Detail rjDetail = qygsGdjczGdjczInfo.new Detail(); EntpubSStohrinvestInfo.Detail sjDetail = qygsGdjczGdjczInfo.new Detail(); List<Detail> rjDetailList = new ArrayList<EntpubSStohrinvestInfo.Detail>(); List<Detail> sjDetailList = new ArrayList<EntpubSStohrinvestInfo.Detail>(); rjDetail.method = rj_method; rjDetail.amount = rj_amount; rjDetail.dateTime = rj_date; rjDetail.showDate = rj_showdate; sjDetail.method = sj_method; sjDetail.amount = sj_amount; sjDetail.dateTime = sj_date; sjDetail.showDate = sj_showdate; qygsGdjczGdjczInfo.setStockholder(stockholder); qygsGdjczGdjczInfo.setSubAmount(rjAmount); qygsGdjczGdjczInfo.setPaidAmount(sjAmount); rjDetailList.add(rjDetail); sjDetailList.add(sjDetail); qygsGdjczGdjczInfo.setSubDetails(rjDetailList); qygsGdjczGdjczInfo.setPaidDetails(sjDetailList); qygsGdjczGdjczs.add(qygsGdjczGdjczInfo); } } qygsGdjczInfo.setStohrInvestInfos(qygsGdjczGdjczs); //??-->?? List<EntpubStohrinvestChangeInfo> qygsGdjczBgInfos = new ArrayList<EntpubStohrinvestChangeInfo>(); Element qygsbgxxDiv = qygsgdczDiv.getElementById("altInv"); Elements qygsbgxxTrs = qygsbgxxDiv.select("tr"); for (int j = 2; j < qygsbgxxTrs.size(); j++) { Elements qygsbgxxTds = qygsbgxxTrs.get(j).select("td"); String bgItem = qygsbgxxTds.get(1).text(); String bgDate = qygsbgxxTds.get(2).text(); String bgqContent = qygsbgxxTds.get(3).text(); String bghContent = qygsbgxxTds.get(4).text(); EntpubStohrinvestChangeInfo qygsGdjczBgInfo = new EntpubStohrinvestChangeInfo(); qygsGdjczBgInfo.setItem(bgItem); qygsGdjczBgInfo.setDateTime(bgDate); qygsGdjczBgInfo.setPreContent(bgqContent); qygsGdjczBgInfo.setPostContent(bghContent); qygsGdjczBgInfos.add(qygsGdjczBgInfo); } if (isDebug) { qygsGdjczInfo.setHtml(qygsbgxxDiv.toString()); } qygsGdjczInfo.setChangeInfos(qygsGdjczBgInfos); qygsInfo.setStohrInvestInfo(qygsGdjczInfo); } //-----------------??-->?? end----------------------- //-----------------??-->??? start----------------------- //??-->??? EntpubEquchangeInfo qygsGqbgInfo = new EntpubEquchangeInfo(); List<EntpubEEquchangeInfo> qygsGqbgGqbgInfos = null; if (qygsxxDoc.getElementById("gqbg") != null) { qygsGqbgGqbgInfos = new ArrayList<EntpubEEquchangeInfo>(); Element qygsgqbgxxDiv = qygsxxDoc.getElementById("gqbg"); Elements qygsgqbgxxTrs = qygsgqbgxxDiv.select("tr"); for (int j = 2; j < qygsgqbgxxTrs.size(); j++) { Elements qygsgqbgxxTds = qygsgqbgxxTrs.get(j).select("td"); String stockholder = qygsgqbgxxTds.get(1).text(); String bgqOwnershipRatio = qygsgqbgxxTds.get(2).text(); String bghOwnershipRatio = qygsgqbgxxTds.get(3).text(); String bgDate = qygsgqbgxxTds.get(4).text(); String gsrq = qygsgqbgxxTds.get(5).text(); EntpubEEquchangeInfo qygsGqbgGqbgInfo = new EntpubEEquchangeInfo(); qygsGqbgGqbgInfo.setStockholder(stockholder); qygsGqbgGqbgInfo.setPreOwnershipRatio(bgqOwnershipRatio); qygsGqbgGqbgInfo.setPostOwnershipRatio(bghOwnershipRatio); qygsGqbgGqbgInfo.setDateTime(bgDate); qygsGqbgGqbgInfos.add(qygsGqbgGqbgInfo); } if (isDebug) { qygsGqbgInfo.setHtml(qygsgqbgxxDiv.toString()); } qygsGqbgInfo.setEquChangeInfos(qygsGqbgGqbgInfos); qygsInfo.setEquChangeInfo(qygsGqbgInfo); } //-----------------??-->??? end----------------------- //-----------------??-->?? start----------------------- //??-->?? EntpubAdmlicInfo qygsXzxkInfo = new EntpubAdmlicInfo(); List<EntpubAAdmlicInfo> qygsXzxkXzxkInfos = null; if (qygsxxDoc.getElementById("licenseRegDiv") != null) { qygsXzxkXzxkInfos = new ArrayList<EntpubAAdmlicInfo>(); Element qygsxzxkDivs = qygsxxDoc.getElementById("licenseRegDiv"); Elements qygsxzxkTrs = qygsxzxkDivs.select("tr"); for (int j = 2; j < qygsxzxkTrs.size(); j++) { Elements qygsxzxkTds = qygsxzxkTrs.get(j).select("td"); String xkwjNum = qygsxzxkTds.get(1).text(); String xkwjName = qygsxzxkTds.get(2).text(); String xzxk_startDate = qygsxzxkTds.get(3).text(); String xzxk_endDate = qygsxzxkTds.get(4).text(); String xkAuthority = qygsxzxkTds.get(5).text(); String xkContent = qygsxzxkTds.get(6).text(); String status = qygsxzxkTds.get(7).text(); String gsrq = qygsxzxkTds.get(8).text(); String detail = qygsxzxkTds.get(9).text(); EntpubAAdmlicInfo qygsXzxkXzxkInfo = new EntpubAAdmlicInfo(); qygsXzxkXzxkInfo.setLicenceNum(xkwjNum); qygsXzxkXzxkInfo.setLicenceName(xkwjName); qygsXzxkXzxkInfo.setStartDateTime(xzxk_startDate); qygsXzxkXzxkInfo.setEndDateTime(xzxk_endDate); qygsXzxkXzxkInfo.setDeciAuthority(xkAuthority); qygsXzxkXzxkInfo.setContent(xkContent); qygsXzxkXzxkInfo.setStatus(status); qygsXzxkXzxkInfo.setDetail(detail); qygsXzxkXzxkInfos.add(qygsXzxkXzxkInfo); } if (isDebug) { qygsXzxkInfo.setHtml(qygsxzxkDivs.toString()); } qygsXzxkInfo.setAdmlicInfos(qygsXzxkXzxkInfos); } qygsInfo.setAdmLicInfo(qygsXzxkInfo); //-----------------??-->?? end----------------------- //-----------------??-->?? start----------------------- EntpubIntellectualproregInfo qygsZscqczdjInfo = new EntpubIntellectualproregInfo(); List<EntpubIIntellectualproregInfo> qygsZscqczdjZscqczdjInfos = null; if (qygsxxDoc.getElementById("xzcfDiv") != null) { qygsZscqczdjZscqczdjInfos = new ArrayList<EntpubIIntellectualproregInfo>(); Element qygszscqdjxxDiv = qygsxxDoc.getElementById("xzcfDiv"); Elements qygszscqdjxxTrs = qygszscqdjxxDiv.select("tr"); for (int j = 2; j < qygszscqdjxxTrs.size(); j++) { Elements qygszscqdjxxTds = qygszscqdjxxTrs.get(j).select("td"); String regNum = qygszscqdjxxTds.get(1).text(); String zscq_name = qygszscqdjxxTds.get(2).text(); String zscq_type = qygszscqdjxxTds.get(3).text(); String czrName = qygszscqdjxxTds.get(4).text(); String zqrName = qygszscqdjxxTds.get(5).text(); String zqdjDeadline = qygszscqdjxxTds.get(6).text(); String status = qygszscqdjxxTds.get(7).text(); String changeSitu = qygszscqdjxxTds.get(8).text(); EntpubIIntellectualproregInfo qygsZscqczdjZscqczdjInfo = new EntpubIIntellectualproregInfo(); qygsZscqczdjZscqczdjInfo.setRegNum(regNum); qygsZscqczdjZscqczdjInfo.setName(zscq_name); qygsZscqczdjZscqczdjInfo.setType(zscq_type); qygsZscqczdjZscqczdjInfo.setMortgagorName(czrName); qygsZscqczdjZscqczdjInfo.setMortgageeName(zqrName); qygsZscqczdjZscqczdjInfo.setPledgeRegDeadline(zqdjDeadline); qygsZscqczdjZscqczdjInfo.setStatus(status); qygsZscqczdjZscqczdjInfo.setChangeSitu(changeSitu); qygsZscqczdjZscqczdjInfos.add(qygsZscqczdjZscqczdjInfo); } if (isDebug) { qygsZscqczdjInfo.setHtml(qygszscqdjxxDiv.toString()); } qygsZscqczdjInfo.setIntellectualProRegInfos(qygsZscqczdjZscqczdjInfos); } qygsInfo.setIntellectualProRegInfo(qygsZscqczdjInfo); //-----------------??-->?? end----------------------- //-----------------??-->? start----------------------- EntpubAdmpunishInfo qygsXzcfInfo = new EntpubAdmpunishInfo(); List<EntpubAAdmpunishInfo> qygsXzcfXzcfInfos = new ArrayList<EntpubAAdmpunishInfo>(); Element qygsxzcfxxDiv = qygsxxDoc.getElementById("xzcfDiv"); if (qygsxxDoc.getElementById("xzcfDiv") != null) { Elements qygsxzcfxxTrs = qygsxzcfxxDiv.select("tr"); for (int j = 2; j < qygsxzcfxxTrs.size(); j++) { Elements qygsxzcfxxTds = qygsxzcfxxTrs.get(j).select("td"); String xzcfjdsNum = qygsxzcfxxTds.get(1).text(); String xzcfContent = qygsxzcfxxTds.get(2).text(); String zcxzcfjdjgName = qygsxzcfxxTds.get(3).text(); String zcxzcfjdDate = qygsxzcfxxTds.get(4).text(); String wfxwType = qygsxzcfxxTds.get(5).text(); String note = qygsxzcfxxTds.get(6).text(); EntpubAAdmpunishInfo qygsXzcfXzcfInfo = new EntpubAAdmpunishInfo(); qygsXzcfXzcfInfo.setPunishRepNum(xzcfjdsNum); qygsXzcfXzcfInfo.setPunishContent(xzcfContent); qygsXzcfXzcfInfo.setDeciAuthority(zcxzcfjdjgName); qygsXzcfXzcfInfo.setDeciDateTime(zcxzcfjdDate); qygsXzcfXzcfInfo.setIllegalActType(wfxwType); qygsXzcfXzcfInfo.setNote(note); qygsXzcfXzcfInfos.add(qygsXzcfXzcfInfo); } if (isDebug) { qygsXzcfInfo.setHtml(qygsxzcfxxDiv.toString()); } qygsXzcfInfo.setAdmPunishInfos(qygsXzcfXzcfInfos); } qygsInfo.setAdmPunishInfo(qygsXzcfInfo); gsxtFeedJson.setEntPubInfo(qygsInfo); //-----------------??-->? end----------------------- // ? OthrdeptpubInfo qtbmgsInfo = new OthrdeptpubInfo(); String qtbmgsxxHtml = (String) resultHtmlMap.get("qtbmgsxx"); Document qtbmgsxxHtmlDoc = Jsoup.parse(qtbmgsxxHtml); //-----------------?-->?? start----------------------- OthrdeptpubAdmlicInfo qtbmgsXzxkInfo = new OthrdeptpubAdmlicInfo(); List<OthrdeptpubAAdmlicInfo> qtbmgsXzxkXzxkInfos = new ArrayList<OthrdeptpubAAdmlicInfo>(); Element qtbmxzxkxxDiv = qtbmgsxxHtmlDoc.getElementById("licenseRegDiv"); Elements qtbmxzxkxxTrs = qtbmxzxkxxDiv.select("tr"); for (Element qtbmxzxkxxTr : qtbmxzxkxxTrs) { Elements qtbmxzxkxxTds = qtbmxzxkxxTr.select("td"); String xkwjNum = qtbmxzxkxxTds.get(1).text(); String xkwjName = qtbmxzxkxxTds.get(2).text(); String xzxk_startDate = qtbmxzxkxxTds.get(3).text(); String xzxk_endDate = qtbmxzxkxxTds.get(4).text(); String xkAuthority = qtbmxzxkxxTds.get(5).text(); String xkContent = qtbmxzxkxxTds.get(6).text(); String status = qtbmxzxkxxTds.get(7).text(); String detail = qtbmxzxkxxTds.get(8).text(); OthrdeptpubAAdmlicInfo qtbmgsXzxkXzxkInfo = new OthrdeptpubAAdmlicInfo(); qtbmgsXzxkXzxkInfo.setLicenceNum(xkwjNum); qtbmgsXzxkXzxkInfo.setLicenceName(xkwjName); qtbmgsXzxkXzxkInfo.setStartDateTime(xzxk_startDate); qtbmgsXzxkXzxkInfo.setEndDateTime(xzxk_endDate); qtbmgsXzxkXzxkInfo.setDeciAuthority(xkAuthority); qtbmgsXzxkXzxkInfo.setContent(xkContent); qtbmgsXzxkXzxkInfo.setStatus(status); qtbmgsXzxkXzxkInfo.setDetail(detail); qtbmgsXzxkXzxkInfos.add(qtbmgsXzxkXzxkInfo); } if (isDebug) { qtbmgsXzxkInfo.setHtml(qtbmxzxkxxDiv.toString()); } qtbmgsXzxkInfo.setAdmLicInfos(qtbmgsXzxkXzxkInfos); qtbmgsInfo.setAdmLicInfo(qtbmgsXzxkInfo); //-----------------?-->?? end----------------------- //-----------------?-->? start----------------------- OthrdeptpubAdmpunishInfo qtbmgsXzcfInfo = new OthrdeptpubAdmpunishInfo(); List<OthrdeptpubAAdmpunishInfo> qtbmgsXzcfXzcfInfos = new ArrayList<OthrdeptpubAAdmpunishInfo>(); Element qtbmxzcfxxDiv = qtbmgsxxHtmlDoc.getElementById("xzcfDiv"); Elements qtbmxzcfxxTrs = qtbmxzcfxxDiv.select("tr"); for (int j = 2; j < qtbmxzcfxxTrs.size(); j++) { Elements qtbmxzcfxxTds = qtbmxzcfxxTrs.get(j).select("td"); String xzcfjdsNum = qtbmxzcfxxTds.get(1).text(); String wfxwType = qtbmxzcfxxTds.get(2).text(); String xzcfContent = qtbmxzcfxxTds.get(3).text(); String zcxzcfjdjgName = qtbmxzcfxxTds.get(4).text(); String zcxzcfjdDate = qtbmxzcfxxTds.get(5).text(); String detail = qtbmxzcfxxTds.get(6).text(); String note = qtbmxzcfxxTds.get(7).text(); OthrdeptpubAAdmpunishInfo qtbmgsXzcfXzcfInfo = new OthrdeptpubAAdmpunishInfo(); qtbmgsXzcfXzcfInfo.setPunishRepNum(xzcfjdsNum); qtbmgsXzcfXzcfInfo.setIllegalActType(wfxwType); qtbmgsXzcfXzcfInfo.setPunishContent(xzcfContent); qtbmgsXzcfXzcfInfo.setDeciAuthority(zcxzcfjdjgName); qtbmgsXzcfXzcfInfo.setDeciDateTime(zcxzcfjdDate); qtbmgsXzcfXzcfInfo.setDetail(detail); qtbmgsXzcfXzcfInfo.setNote(note); qtbmgsXzcfXzcfInfos.add(qtbmgsXzcfXzcfInfo); } if (isDebug) { qtbmgsXzcfInfo.setHtml(qtbmxzcfxxDiv.toString()); } qtbmgsXzcfInfo.setAdmPunishInfos(qtbmgsXzcfXzcfInfos); qtbmgsInfo.setAdmPunishInfo(qtbmgsXzcfInfo); gsxtFeedJson.setOthrDeptPubInfo(qtbmgsInfo); //-----------------?-->? end----------------------- // ???? //-----------------????-->???start----------------------- JudasspubInfo sfxzgsInfo = new JudasspubInfo(); String sfxzgqdjxxHtml = (String) resultHtmlMap.get("sfxzgsxx"); if (sfxzgqdjxxHtml != null) { Document sfxzgqdjxxDoc = Jsoup.parse(sfxzgqdjxxHtml); JudasspubEqufreezeInfo sfxzgsGqdjInfo = new JudasspubEqufreezeInfo(); List<JudasspubEEqufreezeInfo> sfxzgsGqdjGqdjInfos = new ArrayList<JudasspubEEqufreezeInfo>(); Element sfxzgqdjxxDiv = sfxzgqdjxxDoc.getElementById("EquityFreezeDiv"); Elements sfxzgqdjxxTrs = sfxzgqdjxxDiv.select("tr"); for (int j = 2; j < sfxzgqdjxxTrs.size(); j++) { Elements sfxzgqdjxxTds = sfxzgqdjxxTrs.get(j).select("td"); String bzxPerson = sfxzgqdjxxTds.get(1).text(); String gqAmount = sfxzgqdjxxTds.get(2).text(); String exeCourt = sfxzgqdjxxTds.get(3).text(); String xzgstzsNum = sfxzgqdjxxTds.get(4).text(); String status = sfxzgqdjxxTds.get(5).text(); String detail = sfxzgqdjxxTds.get(6).text(); JudasspubEEqufreezeInfo sfxzgsGqdjGqdjInfo = new JudasspubEEqufreezeInfo(); sfxzgsGqdjGqdjInfo.setExecutedPerson(bzxPerson); sfxzgsGqdjGqdjInfo.setEquAmount(gqAmount); sfxzgsGqdjGqdjInfo.setExeCourt(exeCourt); sfxzgsGqdjGqdjInfo.setAssistPubNoticeNum(xzgstzsNum); sfxzgsGqdjGqdjInfo.setStatus(status); sfxzgsGqdjGqdjInfo.setDetail(detail); sfxzgsGqdjGqdjInfos.add(sfxzgsGqdjGqdjInfo); } if (isDebug) { sfxzgsGqdjInfo.setHtml(sfxzgqdjxxDiv.toString()); } sfxzgsGqdjInfo.setEquFreezeInfos(sfxzgsGqdjGqdjInfos); sfxzgsInfo.setEquFreezeInfo(sfxzgsGqdjInfo); //-----------------????-->???end----------------------- //-----------------????-->??start----------------------- JudasspubStohrchangeInfo sfxzgsGdbgInfo = new JudasspubStohrchangeInfo(); List<JudasspubSStohrchangeInfo> sfxzgsGdbgGdbgInfos = new ArrayList<JudasspubSStohrchangeInfo>(); Element sfxzgdbgxxDiv = sfxzgqdjxxDoc.getElementById("xzcfDiv"); Elements sfxzgdbgxxTrs = sfxzgdbgxxDiv.select("tr"); for (int j = 2; j < sfxzgdbgxxTrs.size(); j++) { Elements sfxzgdbgxxTds = sfxzgdbgxxTrs.get(j).select("td"); String bzxPerson = sfxzgdbgxxTds.get(1).text(); String gqAmount = sfxzgdbgxxTds.get(2).text(); String srPerson = sfxzgdbgxxTds.get(3).text(); String exeCourt = sfxzgdbgxxTds.get(4).text(); String detail = sfxzgdbgxxTds.get(5).text(); JudasspubSStohrchangeInfo sfxzgsGdbgGdbgInfo = new JudasspubSStohrchangeInfo(); sfxzgsGdbgGdbgInfo.setExecutedPerson(bzxPerson); sfxzgsGdbgGdbgInfo.setEquAmount(gqAmount); sfxzgsGdbgGdbgInfo.setAssignee(srPerson); sfxzgsGdbgGdbgInfo.setExeCourt(exeCourt); sfxzgsGdbgGdbgInfo.setDetail(detail); sfxzgsGdbgGdbgInfos.add(sfxzgsGdbgGdbgInfo); } if (isDebug) { sfxzgsGdbgInfo.setHtml(sfxzgdbgxxDiv.toString()); } sfxzgsGdbgInfo.setStohrChangeInfos(sfxzgsGdbgGdbgInfos); sfxzgsInfo.setStohrChangeInfo(sfxzgsGdbgInfo); gsxtFeedJson.setJudAssPubInfo(sfxzgsInfo); } //-----------------????-->??end----------------------- return gsxtFeedJson; }
From source file:com.jimplush.goose.ContentExtractor.java
/** * we're going to start looking for where the clusters of paragraphs are. We'll score a cluster based on the number of stopwords * and the number of consecutive paragraphs together, which should form the cluster of text that this node is around * also store on how high up the paragraphs are, comments are usually at the bottom and should get a lower score * * @return// www . j ava 2 s . co m */ private Element calculateBestNodeBasedOnClustering(Document doc) { Element topNode = null; // grab all the paragraph elements on the page to start to inspect the likely hood of them being good peeps ArrayList<Element> nodesToCheck = getNodesToCheck(doc); double startingBoost = 1.0; int cnt = 0; int i = 0; // holds all the parents of the nodes we're checking Set<Element> parentNodes = new HashSet<Element>(); ArrayList<Element> nodesWithText = new ArrayList<Element>(); for (Element node : nodesToCheck) { String nodeText = node.text(); WordStats wordStats = StopWords.getStopWordCount(nodeText); boolean highLinkDensity = isHighLinkDensity(node); if (wordStats.getStopWordCount() > 2 && !highLinkDensity) { nodesWithText.add(node); } } int numberOfNodes = nodesWithText.size(); int negativeScoring = 0; // we shouldn't give more negatives than positives // we want to give the last 20% of nodes negative scores in case they're comments double bottomNodesForNegativeScore = (float) numberOfNodes * 0.25; if (logger.isDebugEnabled()) { logger.debug("About to inspect num of nodes with text: " + numberOfNodes); } for (Element node : nodesWithText) { // add parents and grandparents to scoring // only add boost to the middle paragraphs, top and bottom is usually jankz city // so basically what we're doing is giving boost scores to paragraphs that appear higher up in the dom // and giving lower, even negative scores to those who appear lower which could be commenty stuff float boostScore = 0; if (isOkToBoost(node)) { if (cnt >= 0) { boostScore = (float) ((1.0 / startingBoost) * 50); startingBoost++; } } // check for negative node values if (numberOfNodes > 15) { if ((numberOfNodes - i) <= bottomNodesForNegativeScore) { float booster = (float) bottomNodesForNegativeScore - (float) (numberOfNodes - i); boostScore = -(float) Math.pow(booster, (float) 2); // we don't want to score too highly on the negative side. float negscore = Math.abs(boostScore) + negativeScoring; if (negscore > 40) { boostScore = 5; } } } if (logger.isDebugEnabled()) { logger.debug("Location Boost Score: " + boostScore + " on interation: " + i + "' id='" + node.parent().id() + "' class='" + node.parent().attr("class")); } String nodeText = node.text(); WordStats wordStats = StopWords.getStopWordCount(nodeText); int upscore = (int) (wordStats.getStopWordCount() + boostScore); updateScore(node.parent(), upscore); updateScore(node.parent().parent(), upscore / 2); updateNodeCount(node.parent(), 1); updateNodeCount(node.parent().parent(), 1); if (!parentNodes.contains(node.parent())) { parentNodes.add(node.parent()); } if (!parentNodes.contains(node.parent().parent())) { parentNodes.add(node.parent().parent()); } cnt++; i++; } // now let's find the parent node who scored the highest int topNodeScore = 0; for (Element e : parentNodes) { if (logger.isDebugEnabled()) { logger.debug("ParentNode: score='" + e.attr("gravityScore") + "' nodeCount='" + e.attr("gravityNodes") + "' id='" + e.id() + "' class='" + e.attr("class") + "' "); } //int score = Integer.parseInt(e.attr("gravityScore")) * Integer.parseInt(e.attr("gravityNodes")); int score = getScore(e); if (score > topNodeScore) { topNode = e; topNodeScore = score; } if (topNode == null) { topNode = e; } } if (logger.isDebugEnabled()) { if (topNode == null) { logger.debug("ARTICLE NOT ABLE TO BE EXTRACTED!, WE HAZ FAILED YOU LORD VADAR"); } else { String logText; String targetText = ""; Element topPara = topNode.getElementsByTag("p").first(); if (topPara == null) { topNode.text(); } else { topPara.text(); } if (targetText.length() >= 51) { logText = targetText.substring(0, 50); } else { logText = targetText; } logger.debug("TOPNODE TEXT: " + logText.trim()); logger.debug("Our TOPNODE: score='" + topNode.attr("gravityScore") + "' nodeCount='" + topNode.attr("gravityNodes") + "' id='" + topNode.id() + "' class='" + topNode.attr("class") + "' "); } } return topNode; }
From source file:com.storm.function.GsxtFunction.java
private Map<String, Object> getHtmlInfoMapOfJilin(String area, String keyword, ChannelLogger LOGGER) throws Exception { Map<String, Object> resultHtmlMap = new LinkedHashMap<String, Object>(); String[] command = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/jilin.js", "--web-security=no", "--keyword=" + keyword }; String casperjsResult = CommandUtil.runCommand(command); Elements divDataItems = Jsoup.parse(casperjsResult).getElementsByClass("list"); Elements divNoDataItems = Jsoup.parse(casperjsResult).getElementsByClass("list-a"); if (divDataItems.isEmpty() && !divNoDataItems.isEmpty()) { // ? resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND); } else if (divDataItems.isEmpty() && divDataItems.isEmpty()) { // ?? // ????/*w ww . j a v a 2s.c o m*/ if (casperjsResult.contains("")) { resultHtmlMap.put("statusCodeDef", StatusCodeDef.IMAGECODE_ERROR); } else { resultHtmlMap.put("statusCodeDef", StatusCodeDef.FAILURE); } } else if (!divDataItems.isEmpty() && divNoDataItems.isEmpty()) { // ? // ??????? Element nowCookies = Jsoup.parse(casperjsResult).getElementById("nextParams"); Elements tokenEts = Jsoup.parse(casperjsResult).getElementsByAttributeValue("name", "_csrf"); if (null == nowCookies || null == tokenEts || tokenEts.isEmpty()) { resultHtmlMap.put("statusCodeDef", StatusCodeDef.COOKIE_ERROR); return resultHtmlMap; } String nowCookiesJson = nowCookies.text().trim(); String nowCookiesStr = ((String) new GsonBuilder().create().fromJson(nowCookiesJson, Map.class) .get("Cookie")).trim(); String tokenStr = tokenEts.get(0).attr("content"); String HOST_OF_JILIN = "http://211.141.74.198:8081/aiccips/pub/"; String HOST_OF_XQ = "http://211.141.74.198:8081/"; String htmlAnchorHref = ""; for (Element divDataItem : divDataItems) { Element htmlAnchor = divDataItem.getElementsByTag("a").get(0); String htmlAnchorText = htmlAnchor.text(); if (htmlAnchorText.contains(keyword)) { htmlAnchorHref = HOST_OF_JILIN + htmlAnchor.attr("href"); break; } } if (StringUtils.isEmpty(htmlAnchorHref)) { htmlAnchorHref = "http://211.141.74.198:8081/aiccips/pub/" + divDataItems.get(0).getElementsByTag("a").get(0).attr("href"); } String commonUrl = htmlAnchorHref.split("gsgsdetail")[1]; String commonUrlZ = htmlAnchorHref.substring(htmlAnchorHref.lastIndexOf("/") + 1, htmlAnchorHref.length()); // ?->? String[] command11 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url=" + htmlAnchorHref }; String casperjsResult11 = CommandUtil.runCommand(command11); resultHtmlMap.put("gsgsxx", casperjsResult11); Thread.sleep(1000); // ?->?->?? String baxxZyryxxUrl = HOST_OF_JILIN + "gsryxx/1151?encrpripid=" + commonUrlZ; String[] command121 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + baxxZyryxxUrl }; String casperjsResult121 = CommandUtil.runCommand(command121); resultHtmlMap.put("gsgsxx_baxx_zyryxx", casperjsResult121); // ?->?->? String baxxFzjgxxUrl = HOST_OF_JILIN + "gsfzjg/1151?encrpripid=" + commonUrlZ; String[] command123 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + baxxFzjgxxUrl }; String casperjsResult123 = CommandUtil.runCommand(command123); resultHtmlMap.put("gsgsxx_baxx_fzjgxx", casperjsResult123); // ?->?->? String dcdydjxxDcdydjxxUrl = HOST_OF_JILIN + "gsdcdy?encrpripid=" + commonUrlZ; String[] command131 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + dcdydjxxDcdydjxxUrl }; String casperjsResult131 = CommandUtil.runCommand(command131); resultHtmlMap.put("gsgsxx_dcdydjxx_dcdydjxx", casperjsResult131); // ?->??->?? String gqczdjxxGqczdjxxUrl = HOST_OF_JILIN + "gsgqcz?encrpripid=" + commonUrlZ; String[] command141 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + gqczdjxxGqczdjxxUrl }; String casperjsResult141 = CommandUtil.runCommand(command141); resultHtmlMap.put("gsgsxx_gqczdjxx_gqczdjxx", casperjsResult141); // ?->?->? String xzcfxxXzcfxxUrl = HOST_OF_JILIN + "gsxzcfxx?encrpripid=" + commonUrlZ; String[] command151 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + xzcfxxXzcfxxUrl }; String casperjsResult151 = CommandUtil.runCommand(command151); resultHtmlMap.put("gsgsxx_xzcfxx_xzcfxx", casperjsResult151); // ?->???->??? String jyycxxJyycxxUrl = HOST_OF_JILIN + "jyyc/1151?encrpripid=" + commonUrlZ; String[] command161 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + jyycxxJyycxxUrl }; String casperjsResult161 = CommandUtil.runCommand(command161); resultHtmlMap.put("gsgsxx_jyycxx_jyycxx", casperjsResult161); // ?->???->??? String yzwfxxYzwfxxUrl = HOST_OF_JILIN + "yzwfqy?encrpripid=" + commonUrlZ; String[] command171 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + yzwfxxYzwfxxUrl }; String casperjsResult171 = CommandUtil.runCommand(command171); resultHtmlMap.put("gsgsxx_yzwfxx_yzwfxx", casperjsResult171); // ?->?->? String ccjcxxCcjcxxUrl = HOST_OF_JILIN + "ccjcxx?encrpripid=" + commonUrlZ; String[] command181 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + htmlAnchorHref, "--tokenStr=" + tokenStr, "--url=" + ccjcxxCcjcxxUrl }; String casperjsResult181 = CommandUtil.runCommand(command181); resultHtmlMap.put("gsgsxx_ccjcxx_ccjcxx", casperjsResult181); // ?? String qygsUrl = HOST_OF_JILIN + "qygsdetail" + commonUrl; String[] command2 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url=" + qygsUrl }; String casperjsResult2 = CommandUtil.runCommand(command2); resultHtmlMap.put("qygsxx_list", casperjsResult2); // ? ??->?-> Document qygsxxHtml = Jsoup.parseBodyFragment(casperjsResult2); Element qynbDiv = qygsxxHtml.getElementById("qiyenianbao"); if (null != qynbDiv) { Elements qynb_trs = qynbDiv.select("tbody").get(0).select("tr"); if (null != qynb_trs && qynb_trs.size() > 2) { List<Map<String, Object>> qygsxx_qynb_infos = new ArrayList<Map<String, Object>>(); for (int i = 2; i < qynb_trs.size(); i++) { Map<String, Object> qygsxx_qynb_info_map = new LinkedHashMap<String, Object>(); Element wdd = qynb_trs.get(i).select("td").get(1).select("a").get(0); String qygsxx_qynb_list_a_text = wdd.text(); String qygsxx_qynb_list_pubdate = qynb_trs.get(i).select("td").get(2).text(); qygsxx_qynb_info_map.put("qygsxx_qynb_list_a_text", qygsxx_qynb_list_a_text); qygsxx_qynb_info_map.put("qygsxx_qynb_list_pubdate", qygsxx_qynb_list_pubdate); String qynbxqUrl = HOST_OF_XQ + wdd.attr("href"); String[] command21 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url=" + qynbxqUrl }; String casperjsResult21 = CommandUtil.runCommand(command21); qygsxx_qynb_info_map.put("qygsxx_qynb_info_page", casperjsResult21); qygsxx_qynb_infos.add(qygsxx_qynb_info_map); } resultHtmlMap.put("qygsxx_qynb_infos", qygsxx_qynb_infos); } } Thread.sleep(1000); // ??->??->?? String gdjczxxGdjczxxUrl = HOST_OF_JILIN + "qygsjsxxxzczxx?encrpripid=" + commonUrlZ; String[] command221 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr, "--url=" + gdjczxxGdjczxxUrl }; String casperjsResult221 = CommandUtil.runCommand(command221); resultHtmlMap.put("qygsxx_gdjczxx_gdjczxx", casperjsResult221); // ??->??->?? String gdjczxxBgxxUrl = HOST_OF_JILIN + "qygsjsxxczxxbgsx?encrpripid=" + commonUrlZ; String[] command222 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr, "--url=" + gdjczxxBgxxUrl }; String casperjsResult222 = CommandUtil.runCommand(command222); resultHtmlMap.put("qygsxx_gdjczxx_bgxx", casperjsResult222); // ??->???->??? String gqbgxxGqbgxxUrl = HOST_OF_JILIN + "qygsJsxxgqbg?encrpripid=" + commonUrlZ; String[] command231 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr, "--url=" + gqbgxxGqbgxxUrl }; String casperjsResult231 = CommandUtil.runCommand(command231); resultHtmlMap.put("qygsxx_gqbgxx_gqbgxx", casperjsResult231); // ??->??->?? String xzxkxxXzxkxxUrl = HOST_OF_JILIN + "qygsjsxxxzxk?encrpripid=" + commonUrlZ; String[] command241 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr, "--url=" + xzxkxxXzxkxxUrl }; String casperjsResult241 = CommandUtil.runCommand(command241); resultHtmlMap.put("qygsxx_xzxkxx_xzxkxx", casperjsResult241); // ??->??->?? String zscqczZscqczUrl = HOST_OF_JILIN + "/qygsjsxxzscqcz?encrpripid=" + commonUrlZ; String[] command251 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr, "--url=" + zscqczZscqczUrl }; String casperjsResult251 = CommandUtil.runCommand(command251); resultHtmlMap.put("qygsxx_zscqcz_zscqcz", casperjsResult251); // ??->?->? String qygsxxXzcfxxUrl = HOST_OF_JILIN + "qygsjsxxxzcfxx?encrpripid=" + commonUrlZ; String[] command261 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/postJilinSimpleRequestPage.js", "--web-security=no", "--cookieStr=" + nowCookiesStr, "--refererStr=" + qygsUrl, "--tokenStr=" + tokenStr, "--url=" + qygsxxXzcfxxUrl }; String casperjsResult261 = CommandUtil.runCommand(command261); resultHtmlMap.put("qygsxx_zscqcz_zscqcz", casperjsResult261); // ? String qtbmUrl = HOST_OF_JILIN + "qtgsdetail" + commonUrl; String[] command3 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url=" + qtbmUrl }; String casperjsResult3 = CommandUtil.runCommand(command3); resultHtmlMap.put("qtbmgsxx", casperjsResult3); // ???? String sfxzUrl = HOST_OF_JILIN + "sfgsdetail" + commonUrl; String[] command4 = { "casperjs", "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js", "--web-security=no", "--url=" + sfxzUrl }; String casperjsResult4 = CommandUtil.runCommand(command4); resultHtmlMap.put("sfxzgsxx_list", casperjsResult4); resultHtmlMap.put("statusCodeDef", StatusCodeDef.SCCCESS); } return resultHtmlMap; }
From source file:ca.appvelopers.mcgillmobile.model.retrofit.ScheduleConverter.java
@Override public List<Course> convert(ResponseBody value) throws IOException { List<Course> courses = new ArrayList<>(); //Parse the body into a Document Elements scheduleTable = Jsoup.parse(value.string()).getElementsByClass("datadisplaytable"); //Go through the schedule table for (int i = 0; i < scheduleTable.size(); i += 2) { //Get the current row in the schedule table Element row = scheduleTable.get(i); //Course title, code, and section String[] texts = row.getElementsByTag("caption").first().text().split(" - "); String title = texts[0].substring(0, texts[0].length() - 1); String code = texts[1];/*from ww w. jav a2 s . com*/ String section = texts[2]; //Parse the subject from the code String subject = ""; try { subject = code.substring(0, 4); } catch (StringIndexOutOfBoundsException e) { Timber.e(e, "Schedule Parser Error: Subject"); } String number = ""; try { number = code.substring(5, 8); } catch (StringIndexOutOfBoundsException e) { Timber.e(e, "Schedule Parser Error: Number"); } //CRN String crnString = row.getElementsByTag("tr").get(1).getElementsByTag("td").first().text(); int crn = -1; try { crn = Integer.parseInt(crnString); } catch (NumberFormatException e) { Timber.e(e, "Schedule Parser Error: CRN"); } //Credits String creditString = row.getElementsByTag("tr").get(5).getElementsByTag("td").first().text(); double credits = -1; try { credits = Double.parseDouble(creditString); } catch (NumberFormatException e) { Timber.e(e, "Schedule Parser Error: Credits"); } //Time, Days, Location, Type, Instructor if (i + 1 < scheduleTable.size() && scheduleTable.get(i + 1).attr("summary") .equals("This table lists the scheduled meeting times and assigned " + "instructors for this class..")) { //Get the rows with the schedule times Elements timeRows = scheduleTable.get(i + 1).getElementsByTag("tr"); for (int j = 1; j < timeRows.size(); j++) { //Get all of the cells of the current rows Elements cells = timeRows.get(j).getElementsByTag("td"); String[] times = {}; List<DayOfWeek> days = new ArrayList<>(); String location = ""; String dateRange = ""; String type = ""; String instructor = ""; try { times = cells.get(0).text().split(" - "); //Day Parsing String dayString = cells.get(1).text().replace('\u00A0', ' ').trim(); for (int k = 0; k < dayString.length(); k++) { days.add(DayUtils.getDay(dayString.charAt(k))); } location = cells.get(2).text(); dateRange = cells.get(3).text(); type = cells.get(4).text(); instructor = cells.get(5).text(); } catch (IndexOutOfBoundsException e) { Timber.e(e, "Schedule Parser Error: Course Info"); } //Time parsing LocalTime startTime, endTime; try { int startHour = Integer.parseInt(times[0].split(" ")[0].split(":")[0]); int startMinute = Integer.parseInt(times[0].split(" ")[0].split(":")[1]); int endHour = Integer.parseInt(times[1].split(" ")[0].split(":")[0]); int endMinute = Integer.parseInt(times[1].split(" ")[0].split(":")[1]); //If it's PM, then add 12 hours to the hours for 24 hours format //Make sure it isn't noon String startPM = times[0].split(" ")[1]; if (startPM.equals("PM") && startHour != 12) { startHour += 12; } String endPM = times[1].split(" ")[1]; if (endPM.equals("PM") && endHour != 12) { endHour += 12; } startTime = LocalTime.of(startHour, startMinute); endTime = LocalTime.of(endHour, endMinute); } catch (NumberFormatException e) { //Some classes don't have assigned times startTime = getDefaultStartTime(); endTime = getDefaultEndTime(); } //Date Range parsing LocalDate startDate, endDate; try { Pair<LocalDate, LocalDate> dates = parseDateRange(dateRange); startDate = dates.first; endDate = dates.second; } catch (IllegalArgumentException e) { Timber.e(e, "Schedule Parser Error: Date Range"); //Use today as the date if there's an error startDate = LocalDate.now(); endDate = LocalDate.now(); } //Add the course courses.add(new Course(subject, number, title, crn, section, startTime, endTime, days, type, location, instructor, credits, startDate, endDate)); } } else { //If there is no data to parse, reset i and continue i--; } } return courses; }
From source file:ca.zadrox.dota2esportticker.service.UpdateTeamsService.java
private void updateTopTeams() { LOGD(TAG, "starting update"); // actually, first, check for connectivity: if (!checkForConnectivity()) { LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_NO_CONNECTIVITY)); LOGD(TAG, "returning due to no connectivity"); return;/*from ww w. j a va 2 s. c o m*/ } // first, check last update time long lastUpdate = PrefUtils.lastTeamsUpdate(this); long currentTime = TimeUtils.getUTCTime(); // if last update is less than 1 hour old, boot user to cursorloader op. if (currentTime - lastUpdate < 60000 * 60) { LOGD(TAG, "returnning due to too soon"); LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_COMPLETED)); return; } // else // use local broadcast manager to show loading indicator LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_UPDATING)); final String BASE_URL = "http://www.gosugamers.net/dota2/rankings"; final String TEAM_LINK_BASE_URL = "http://www.gosugamers.net/dota2/teams/"; // we see what teams are in top 50. (httpreq -> gosugamers) try { String rawHtml = new OkHttpClient().newCall(new Request.Builder().url(BASE_URL).build()).execute() .body().string(); String processedHtml = rawHtml.substring(rawHtml.indexOf("<div id=\"col1\" class=\"rows\">"), rawHtml.indexOf("<div id=\"col2\" class=\"rows\">")); Elements teamRows = Jsoup.parse(processedHtml).getElementsByClass("ranking-link"); ExecutorService executorService = Executors.newFixedThreadPool(10); ContentValues[] teamRanks = new ContentValues[50]; HashMap<ContentValues, Future<String>> newTeamInfo = new HashMap<ContentValues, Future<String>>(); HashMap<ContentValues, Future<String>> updateTeamInfo = new HashMap<ContentValues, Future<String>>(); int i = 0; for (Element teamRow : teamRows) { ContentValues contentValues = new ContentValues(); String teamId = teamRow.attr("data-id"); contentValues.put(MatchContract.TeamEntry._ID, teamId); String untrimmedTeamName = teamRow.getElementsByTag("h4").first().text(); String teamUrl = TEAM_LINK_BASE_URL + teamId + "-" + untrimmedTeamName.replaceAll("[\\W]?[\\W][\\W]*", "-").toLowerCase(); contentValues.put(MatchContract.TeamEntry.COLUMN_TEAM_URL, teamUrl); String teamName = untrimmedTeamName.replaceAll(" ?\\.?\\-?-?Dot[aA][\\s]?2", ""); contentValues.put(MatchContract.TeamEntry.COLUMN_TEAM_NAME, teamName); if (teamUrl.charAt(teamUrl.length() - 1) == '-') { teamUrl = teamUrl.substring(0, teamUrl.length() - 2); } // then, we query db for id of the team ( Cursor cursor = getContentResolver().query( MatchContract.TeamEntry.buildTeamUri(Long.parseLong(teamId)), new String[] { MatchContract.TeamEntry.COLUMN_TEAM_NAME, MatchContract.TeamEntry.COLUMN_TEAM_URL }, null, null, null); // -> if present, and data remains unchanged, continue. // -> if present, but data is changed, add to update queue. if (cursor.moveToFirst()) { LOGD(TAG, "Have team already?"); if (!cursor.getString(0).contentEquals(teamName) || !cursor.getString(1).contentEquals(teamUrl)) { LOGD(TAG, "Team has updated values."); updateTeamInfo.put(contentValues, executorService.submit(new TeamGetter(teamUrl))); } } // -> if not present, add to update queue. else { LOGD(TAG, "Do team update"); newTeamInfo.put(contentValues, executorService.submit(new TeamGetter(teamUrl))); } // LOGD(TAG, "\n" + // "data-id: " + teamId + "\n" + // "team-name: " + teamName + "\n" + // "team-url: " + teamUrl); teamRanks[i] = new ContentValues(); teamRanks[i].put(MatchContract.TeamRankEntry._ID, i + 1); teamRanks[i].put(MatchContract.TeamRankEntry.COLUMN_TEAM_ID, teamId); cursor.close(); i++; } executorService.shutdown(); executorService.awaitTermination(20, TimeUnit.SECONDS); for (ContentValues contentValues : newTeamInfo.keySet()) { try { String teamLogo = newTeamInfo.get(contentValues).get(); contentValues.put(MatchContract.TeamEntry.COLUMN_TEAM_LOGO_URL, teamLogo); } catch (ExecutionException e) { LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_ERROR)); e.printStackTrace(); } } for (ContentValues contentValues : updateTeamInfo.keySet()) { try { String teamLogo = updateTeamInfo.get(contentValues).get(); contentValues.put(MatchContract.TeamEntry.COLUMN_TEAM_LOGO_URL, teamLogo); String teamId = contentValues.getAsString(MatchContract.TeamEntry._ID); contentValues.remove(MatchContract.TeamEntry._ID); int updatedRows = getContentResolver().update(MatchContract.TeamEntry.CONTENT_URI, contentValues, MatchContract.TeamEntry.TABLE_NAME + "." + MatchContract.TeamEntry._ID + " = ?", new String[] { teamId }); LOGD(TAG, "updatedRows: " + updatedRows); } catch (ExecutionException e) { LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_ERROR)); e.printStackTrace(); } } getContentResolver().bulkInsert(MatchContract.TeamEntry.CONTENT_URI, newTeamInfo.keySet().toArray(new ContentValues[newTeamInfo.size()])); getContentResolver().bulkInsert(MatchContract.TeamRankEntry.CONTENT_URI, teamRanks); } catch (IOException e) { LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_ERROR)); e.printStackTrace(); } catch (InterruptedException e2) { LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_ERROR)); e2.printStackTrace(); } // String[] projection = new String[]{ // MatchContract.TeamEntry.TABLE_NAME + "." + MatchContract.TeamEntry._ID, // MatchContract.TeamEntry.COLUMN_TEAM_NAME, // MatchContract.TeamEntry.COLUMN_TEAM_URL, // MatchContract.TeamEntry.COLUMN_TEAM_LOGO_URL, // MatchContract.TeamEntry.COLUMN_TEAM_STARRED, // MatchContract.TeamRankEntry.TABLE_NAME + "." + MatchContract.TeamRankEntry._ID // }; // // String sortOrder = // MatchContract.TeamRankEntry.TABLE_NAME + "." + // MatchContract.TeamRankEntry._ID + " ASC"; // // Cursor c = getContentResolver().query( // MatchContract.TeamEntry.TOP_50_URI, // projection, // null, // null, // sortOrder // ); // // while (c.moveToNext()) { // String teamPrintOut = // "Rank: " + c.getInt(5) + "\n" + // "teamId: " + c.getInt(0) + " teamName: " + c.getString(1) + "\n" + // "teamUrl: " + c.getString(2) + "\n" + // "teamLogoUrl: " + c.getString(3) + "\n" + // "isFavourited: " + (c.getInt(4) == 0 ? "false" : "true"); // LOGD(TAG + "/UTT", teamPrintOut); // } // // c.close(); // use local broadcast manager to hide loading indicator // and signal that cursorloader for top50 can happen. PrefUtils.setLastTeamUpdate(this, currentTime); LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_COMPLETED)); }
From source file:ca.zadrox.dota2esportticker.service.UpdateTeamsService.java
private void updateSearchedTeams(String searchName) { LOGD(TAG, "starting search update"); // actually, first, check for connectivity: if (!checkForConnectivity()) { LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_NO_CONNECTIVITY)); LOGD(TAG, "returning due to no connectivity"); return;/*from ww w .j a v a2s.c om*/ } LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_UPDATING)); final String BASE_URL = "http://www.gosugamers.net/dota2/rankings" + "?tname=" + searchName.replace(' ', '+') + "&tunranked=0#team"; final String TEAM_LINK_BASE_URL = "http://www.gosugamers.net/dota2/teams/"; try { String rawHtml = new OkHttpClient().newCall(new Request.Builder().url(BASE_URL).build()).execute() .body().string(); String processedHtml = rawHtml.substring(rawHtml.indexOf("<div id=\"col1\" class=\"rows\">"), rawHtml.indexOf("<div id=\"col2\" class=\"rows\">")); Elements teamRows = Jsoup.parse(processedHtml).getElementsByClass("ranking-link"); ExecutorService executorService = Executors.newFixedThreadPool(10); HashMap<ContentValues, Future<String>> newTeamInfo = new HashMap<ContentValues, Future<String>>(); HashMap<ContentValues, Future<String>> updateTeamInfo = new HashMap<ContentValues, Future<String>>(); for (Element teamRow : teamRows) { ContentValues contentValues = new ContentValues(); String teamId = teamRow.attr("data-id"); contentValues.put(MatchContract.TeamEntry._ID, teamId); String untrimmedTeamName = teamRow.getElementsByTag("h4").first().text(); String teamUrl = TEAM_LINK_BASE_URL + teamId + "-" + untrimmedTeamName.replaceAll("[\\W]?[\\W][\\W]*", "-").toLowerCase(); contentValues.put(MatchContract.TeamEntry.COLUMN_TEAM_URL, teamUrl); String teamName = untrimmedTeamName.replaceAll(" ?\\.?\\-?-?Dot[aA][\\s]?2", ""); contentValues.put(MatchContract.TeamEntry.COLUMN_TEAM_NAME, teamName); if (teamUrl.charAt(teamUrl.length() - 1) == '-') { teamUrl = teamUrl.substring(0, teamUrl.length() - 2); } // then, we query db for id of the team ( Cursor cursor = getContentResolver().query( MatchContract.TeamEntry.buildTeamUri(Long.parseLong(teamId)), new String[] { MatchContract.TeamEntry.COLUMN_TEAM_NAME, MatchContract.TeamEntry.COLUMN_TEAM_URL }, null, null, null); // -> if present, and data remains unchanged, continue. // -> if present, but data is changed, add to update queue. if (cursor.moveToFirst()) { LOGD(TAG, "Team in DB, determining if values need updating"); if (!cursor.getString(0).contentEquals(teamName) || !cursor.getString(1).contentEquals(teamUrl)) { LOGD(TAG, "Team has updated values, double checking logo & writing to DB"); updateTeamInfo.put(contentValues, executorService.submit(new TeamGetter(teamUrl))); } } // -> if not present, add to update queue. else { LOGD(TAG, "Team not in DB. Grabbing logo & writing to DB"); newTeamInfo.put(contentValues, executorService.submit(new TeamGetter(teamUrl))); } // LOGD(TAG, "\n" + // "data-id: " + teamId + "\n" + // "team-name: " + teamName + "\n" + // "team-url: " + teamUrl); // cursor.close(); } executorService.shutdown(); executorService.awaitTermination(20, TimeUnit.SECONDS); for (ContentValues contentValues : newTeamInfo.keySet()) { try { String teamLogo = newTeamInfo.get(contentValues).get(); contentValues.put(MatchContract.TeamEntry.COLUMN_TEAM_LOGO_URL, teamLogo); } catch (ExecutionException e) { LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_ERROR)); e.printStackTrace(); } } for (ContentValues contentValues : updateTeamInfo.keySet()) { try { String teamLogo = newTeamInfo.get(contentValues).get(); contentValues.put(MatchContract.TeamEntry.COLUMN_TEAM_LOGO_URL, teamLogo); String teamId = contentValues.getAsString(MatchContract.TeamEntry._ID); contentValues.remove(MatchContract.TeamEntry._ID); int updatedRows = getContentResolver().update(MatchContract.TeamEntry.CONTENT_URI, contentValues, MatchContract.TeamEntry.TABLE_NAME + "." + MatchContract.TeamEntry._ID + " = ?", new String[] { teamId }); LOGD(TAG, "updatedRows: " + updatedRows); } catch (ExecutionException e) { LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_ERROR)); e.printStackTrace(); } } getContentResolver().bulkInsert(MatchContract.TeamEntry.CONTENT_URI, newTeamInfo.keySet().toArray(new ContentValues[newTeamInfo.size()])); } catch (IOException e) { LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_ERROR)); e.printStackTrace(); } catch (InterruptedException e2) { LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_ERROR)); e2.printStackTrace(); } // String[] projection = new String[]{ // MatchContract.TeamEntry.TABLE_NAME + "." + MatchContract.TeamEntry._ID, // MatchContract.TeamEntry.COLUMN_TEAM_NAME, // MatchContract.TeamEntry.COLUMN_TEAM_URL, // MatchContract.TeamEntry.COLUMN_TEAM_LOGO_URL, // MatchContract.TeamEntry.COLUMN_TEAM_STARRED, // }; // // String sortOrder = // MatchContract.TeamEntry.COLUMN_TEAM_NAME + " ASC"; // // Cursor c = getContentResolver().query( // MatchContract.TeamEntry.CONTENT_URI, // projection, // MatchContract.TeamEntry.COLUMN_TEAM_NAME + " LIKE '%" + searchName + "%'", // null, // sortOrder // ); // // LOGD(TAG+"/UST", "Starting Printout: "); // int i = 0; // while (c.moveToNext()) { // String teamPrintOut = // "teamId: " + c.getInt(0) + " teamName: " + c.getString(1) + "\n" + // "teamUrl: " + c.getString(2) + "\n" + // "teamLogoUrl: " + c.getString(3) + "\n" + // "isFavourited: " + (c.getInt(4) == 0 ? "false" : "true"); // LOGD(TAG + "/UST", teamPrintOut); // i++; // } // LOGD(TAG+"/UST", "Stopping Printout. Count: " + i); // // c.close(); // use local broadcast manager to hide loading indicator // and signal that cursorloader for top50 can happen. LocalBroadcastManager.getInstance(this).sendBroadcast(new Intent(STATUS_COMPLETED)); }
From source file:cc.metapro.openct.grades.GradePresenter.java
@Override public void loadCETGrade(final Map<String, String> queryMap) { Observable.create(new ObservableOnSubscribe<Map<String, String>>() { @Override/*from ww w. j a va2 s. c om*/ public void subscribe(ObservableEmitter<Map<String, String>> e) throws Exception { CETService service = ServiceCenter.createCETService(); String queryResult = service.queryCET(mContext.getString(R.string.url_chsi_referer), queryMap.get(mContext.getString(R.string.key_ticket_num)), queryMap.get(mContext.getString(R.string.key_full_name)), "t").execute().body(); Document document = Jsoup.parse(queryResult); Elements elements = document.select("table[class=cetTable]"); Element targetTable = elements.first(); Elements tds = targetTable.getElementsByTag("td"); String name = tds.get(0).text(); String school = tds.get(1).text(); String type = tds.get(2).text(); String num = tds.get(3).text(); String time = tds.get(4).text(); String grade = tds.get(5).text(); Map<String, String> results = new HashMap<>(6); results.put(mContext.getString(R.string.key_full_name), name); results.put(mContext.getString(R.string.key_school), school); results.put(mContext.getString(R.string.key_cet_type), type); results.put(mContext.getString(R.string.key_ticket_num), num); results.put(mContext.getString(R.string.key_cet_time), time); results.put(mContext.getString(R.string.key_cet_grade), grade); e.onNext(results); } }).subscribeOn(Schedulers.newThread()).observeOn(AndroidSchedulers.mainThread()) .doOnNext(new Consumer<Map<String, String>>() { @Override public void accept(Map<String, String> stringMap) throws Exception { mView.onLoadCETGrade(stringMap); } }).onErrorReturn(new Function<Throwable, Map<String, String>>() { @Override public Map<String, String> apply(Throwable throwable) throws Exception { Toast.makeText(mContext, R.string.fetch_cet_fail, Toast.LENGTH_SHORT).show(); return new HashMap<>(); } }).subscribe(); }
From source file:com.near.chimerarevo.fragments.PostFragment.java
private void parseHTML(String html) { Document doc = Jsoup.parse(html); Elements el = doc.body().children(); for (Element e : el) { if (e.getElementsByTag("h1").size() > 0) parseTitles(e.getElementsByTag("h1"), 1); if (e.getElementsByTag("h2").size() > 0) parseTitles(e.getElementsByTag("h2"), 2); if (e.getElementsByTag("h3").size() > 0) parseTitles(e.getElementsByTag("h3"), 3); if (e.getElementsByTag("h4").size() > 0) parseTitles(e.getElementsByTag("h4"), 4); if (e.getElementsByTag("h5").size() > 0) parseTitles(e.getElementsByTag("h5"), 5); if (e.getElementsByTag("p").size() > 0) parseParagraphs(e.getElementsByTag("p")); if (e.getElementsByTag("img").size() > 0) parseNormalImages(e.getElementsByTag("img")); if (e.getElementsByTag("a").size() > 0) parseLinkedImages(e.getElementsByTag("a")); if (e.getElementsByTag("iframe").size() > 0) parseYoutubeVideos(e.getElementsByTag("iframe")); if (e.getElementsByTag("ul").size() > 0) parseBulletedLists(e.getElementsByTag("ul")); if (e.getElementsByTag("ol").size() > 0) parseOrderedLists(e.getElementsByTag("ol")); if (e.getElementsByTag("pre").size() > 0) parseCodeText(e.getElementsByTag("pre")); if (e.getElementsByTag("tr").size() > 0) parseTables(e.getElementsByTag("tr")); }// w w w. j av a 2 s.c o m ((PostContainerActivity) getActivity()).setIsLoading(false); }
From source file:com.near.chimerarevo.fragments.PostFragment.java
private void parseParagraphs(Elements ps) { for (Element p : ps) { if (!p.html().startsWith("&") && !p.html().startsWith("<iframe") && !p.html().startsWith("<!") && !p.html().contains("<h") && !p.html().contains("<ol") && !p.html().contains("<ul") && !p.html().contains("<pre") && !p.html().contains("<tr")) { parseNormalImages(p.select("img")); p.select("img").remove(); Elements lnks = p.getElementsByTag("a"); for (Element lnk : lnks) { if (lnk.attr("href").startsWith("#")) lnk.removeAttr("href"); }/*from w ww .j a va 2 s. c om*/ String txt = p.html().replace("<br />", "").replace("\n", "").trim(); if (txt.length() > 0) addText(txt, true, Typeface.DEFAULT); } } }
From source file:com.near.chimerarevo.fragments.PostFragment.java
private void parseBulletedLists(Elements itms) { String bld = ""; for (Element itm : itms) { Elements str = itm.getElementsByTag("li"); for (Element itm2 : str) { if (itm2.children().size() >= 1) { Elements ch = itm2.getElementsByTag("a"); for (Element c : ch) { if (c.attr("href").contains("#")) c.removeAttr("href"); }//w w w.j a v a2s . c om } bld += ("\u2022 " + itm2.outerHtml() + "<br />"); } } addText(bld, true, Typeface.DEFAULT); }