Java tutorial
/* * Copyright 2015 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * All rights reserved. */ package info.smartkit.hairy_batman.query; import info.smartkit.hairy_batman.config.GlobalConsts; import info.smartkit.hairy_batman.config.GlobalVariables; import info.smartkit.hairy_batman.domain.WxComplexSubscriber; import info.smartkit.hairy_batman.plain.WxSogou; import info.smartkit.hairy_batman.plain.WxSogouSimple; import info.smartkit.hairy_batman.reports.FileReporter; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Iterator; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codehaus.jackson.JsonParseException; import org.codehaus.jackson.map.JsonMappingException; import org.codehaus.jackson.map.ObjectMapper; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.springframework.dao.DataAccessException; import org.springframework.util.LinkedMultiValueMap; import org.springframework.util.MultiValueMap; /** * Abstract query class for html(based on Sogou search) reading and parsing then * querying. * * @author yangboz */ public class SogouSearchQuery { private static Logger LOG = LogManager.getLogger(SogouSearchQuery.class); protected WxComplexSubscriber wxFoo; MultiValueMap<String, String> parameters; public SogouSearchQuery() { } public SogouSearchQuery(WxComplexSubscriber wxFoo) { this.wxFoo = wxFoo; this.parameters = new LinkedMultiValueMap<String, String>(); } public void parseWxOpenId() { Document doc; try { // need http protocol // doc = Jsoup.connect(GlobalConsts.SOGOU_SEARCH_URL_BASE+ wxFoo.getSubscribeId()).get(); doc = Jsoup.connect("http://weixin.sogou.com/weixin?type=1&query=" + wxFoo.getSubscribeId() + "&fr=sgsearch&ie=utf8&_ast=1423915648&_asf=null&w=01019900&cid=null&sut=19381").get(); LOG.debug("openID html INFO:" + doc.html()); // get page title String title = doc.title(); LOG.debug("title : " + title); // get all "?:" value of html <span> //Elements openIdLink = doc.select(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_HTML_ELEMENTS).select(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_HTML_ELE_IDENTITY); Elements openIdLink = doc.getElementsByClass("wx-rb"); Element a = null; String openIdLinkHref = ""; if (openIdLink != null && openIdLink.size() > 0) { Iterator<Element> itea = openIdLink.iterator(); while (itea.hasNext()) { a = itea.next(); LOG.debug("openID html INFO:" + a.html()); if (a.getElementsByTag("em").html().indexOf(wxFoo.getSubscribeId()) != -1) { break; } } } if (a != null) { openIdLinkHref = a.attr("href"); } LOG.debug("openIdLinkHref:" + openIdLinkHref); // FIXME:???? if (this.wxFoo.getOpenId() == null && openIdLinkHref.length() > 0) { this.wxFoo.setOpenId(openIdLinkHref.split(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_KEYWORDS)[1]); LOG.info("saved wxOpenId value: " + this.wxFoo.getOpenId()); GlobalVariables.wxFooListWithOpenId.add(this.wxFoo); // File reporting new FileReporter(GlobalConsts.REPORT_FILE_OUTPUT_OPENID, GlobalVariables.wxFooListWithOpenId, FileReporter.REPORTER_TYPE.R_T_OPENID, FileReporter.REPORTER_FILE_TYPE.EXCEL).write(); // Then,OpenID JSON site parse if (this.wxFoo.getOpenId() != null) { // Save openId to DB. try { GlobalVariables.jdbcTempate.update("insert into " + GlobalConsts.QUERY_TABLE_NAME_BASIC + "(id,store,agency,unit,subscribeId,onSubscribe,code,openId) values(?,?,?,?,?,?,?,?)", new Object[] { this.wxFoo.getId(), this.wxFoo.getStore(), this.wxFoo.getAgency(), this.wxFoo.getUnit(), this.wxFoo.getSubscribeId(), this.wxFoo.getOnSubscribe(), this.wxFoo.getCode(), this.wxFoo.getOpenId() }, new int[] { java.sql.Types.INTEGER, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR }); this.parseSogouJsonSite(this.wxFoo.getOpenId()); } catch (DataAccessException e) { e.printStackTrace(); } } else { LOG.warn("SogouSearchQuery getOpenId Failure! site info:" + wxFoo.getCode()); // TODO write those info to File or DB for collect which // agency not open weixin service // Save openId to DB. try { GlobalVariables.jdbcTempate.update("insert into " + GlobalConsts.QUERY_TABLE_NAME_BASIC + "(id,store,agency,unit,subscribeId,onSubscribe,code,openId) values(?,?,?,?,?,?,?,?)", new Object[] { this.wxFoo.getId(), this.wxFoo.getStore(), this.wxFoo.getAgency(), this.wxFoo.getUnit(), this.wxFoo.getSubscribeId(), this.wxFoo.getOnSubscribe(), this.wxFoo.getCode(), "" }, new int[] { java.sql.Types.INTEGER, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR }); LOG.warn("Can not get subsriber info: " + this.wxFoo.getCode()); this.parseSogouJsonSite(this.wxFoo.getOpenId()); } catch (DataAccessException e) { e.printStackTrace(); } } } } catch (IOException e) { // e.printStackTrace(); LOG.error(e.toString()); } } public void parseWxUserId() { Document doc; try { // need http protocol doc = Jsoup.connect(GlobalConsts.SOGOU_SEARCH_URL_BASE + wxFoo.getSubscribeId()).get(); // get all "?:" value of html <span> Elements openIdSpans = doc.select(GlobalConsts.SOGOU_SEARCH_WX_USER_ID_HTML_ELEMENTS); // for (Element openIdSpan : openIdSpans) { if (openIdSpan.hasText()) { if (openIdSpan.text().contains(GlobalConsts.SOGOU_SEARCH_WX_USER_ID_KEYWORDS)) { // get the value from href attribute LOG.info("openId span text : " + openIdSpan.text()); // FIXME:???? if (this.wxFoo.getUserId() == null) { this.wxFoo.setOpenId( openIdSpan.text().split(GlobalConsts.SOGOU_SEARCH_WX_USER_ID_KEYWORDS)[1]); LOG.info("saved wxUserId value: " + this.wxFoo.getUserId()); GlobalVariables.wxFooListWithUserId.add(this.wxFoo); } } } } } catch (IOException e) { // e.printStackTrace(); LOG.error(e.toString()); } } // @see: // http://weixin.sogou.com/gzhjs?cb=sogou.weixin.gzhcb&openid=oIWsFt_Ri_gqjARIY_shVuqjc3Zo public void parseSogouJsonSite(String openId) { ObjectMapper mapper = new ObjectMapper(); // can reuse, share globally WxSogou wxSogouJson = null; try { int i = 2; String url = GlobalConsts.SOGOU_SEARCH_URL_JSON + openId + "&page=1"; String content = this.getJsonContent(url); if (content != null && content.length() > 0) { wxSogouJson = mapper.readValue(this.getJsonContent(url), WxSogou.class); GlobalVariables.openIdWithArticleList.put(openId, wxSogouJson);// Store it. this.assembleWxfooListWithAritcle(wxSogouJson, openId); } else { LOG.error("Error get info from weixin.sogou.com. URL is :" + url); LOG.error(wxFoo.getCode() + ":" + content); } // Thread.sleep(6000); long totalPages = wxSogouJson.getTotalPages(); LOG.info(wxFoo.getCode() + " -- totalPages:" + totalPages); while (i < totalPages + 1) { Thread.sleep(2 * 1000); content = this.getJsonContent(GlobalConsts.SOGOU_SEARCH_URL_JSON + openId + "&page=" + i); wxSogouJson = mapper.readValue(content, WxSogou.class); this.assembleWxfooListWithAritcle(wxSogouJson, openId); GlobalVariables.openIdWithArticleList.put(openId, wxSogouJson);// Store // it. // Thread.sleep(6000); i++; } wxSogouJson = null; } catch (JsonParseException e) { e.printStackTrace(); } catch (JsonMappingException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } public String getJsonContent(String urlStr) { try {// ?HttpURLConnection URL url = new URL(urlStr); HttpURLConnection httpConn = (HttpURLConnection) url.openConnection(); // httpConn.setConnectTimeout(3000); httpConn.setDoInput(true); httpConn.setRequestMethod("GET"); // ?? int respCode = httpConn.getResponseCode(); if (respCode == 200) { String a = ConvertStream2Json(httpConn.getInputStream()); return a.substring(a.indexOf("(") + 1, a.lastIndexOf(")")); } } catch (MalformedURLException e) { LOG.error(e.toString()); } catch (IOException e) { LOG.error(e.toString()); } return ""; } private String ConvertStream2Json(InputStream inputStream) { String jsonStr = ""; // ByteArrayOutputStream? ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] buffer = new byte[1024]; int len = 0; // ?? try { while ((len = inputStream.read(buffer, 0, buffer.length)) != -1) { out.write(buffer, 0, len); } // ?? jsonStr = new String(out.toByteArray(), "UTF-8"); } catch (IOException e) { e.printStackTrace(); } return jsonStr; } private void assembleWxfooListWithAritcle(WxSogou wxSogou, String openId) { LOG.info("wxSogou json result:" + wxSogou.toString()); // ArrayList<WxSogouSimple> titlesUrls = wxSogou.getTitlesUrls(); LOG.info("Artilec size:" + titlesUrls.size()); for (WxSogouSimple titleUrl : titlesUrls) { WxComplexSubscriber subscriber = new WxComplexSubscriber(); subscriber.setId(this.wxFoo.getId()); subscriber.setCode(this.wxFoo.getCode()); subscriber.setStore(this.wxFoo.getStore()); subscriber.setAgency(this.wxFoo.getAgency()); subscriber.setUnit(this.wxFoo.getUnit()); subscriber.setOnSubscribe(this.wxFoo.getOnSubscribe()); subscriber.setSubscribeId(this.wxFoo.getSubscribeId()); subscriber.setOpenId(openId); subscriber.setArticleTitle(titleUrl.getTitle()); subscriber.setArticleUrl(titleUrl.getUrl()); subscriber.setArticleTime(titleUrl.getDate()); GlobalVariables.wxFooListWithOpenIdArticle.add(subscriber); // Save values to DB(wxArticle). GlobalVariables.jdbcTempate.update(GlobalConsts.JDBC_QUERY_INSERT_OPENID_ARTICLE, titleUrl.getDate(), titleUrl.getTitle(), titleUrl.getUrl(), openId); } /* * LOG.info("GlobalVariables.wxFooListWithOpenIdArticle(size): " + * GlobalVariables.wxFooListWithOpenIdArticle.size() + ", raw: " + * GlobalVariables.wxFooListWithOpenIdArticle.toString()); // File * reporting... new * FileReporter(GlobalConsts.REPORT_FILE_OUTPUT_OPENID_ARITICLE, * GlobalVariables.wxFooListWithOpenIdArticle, * FileReporter.REPORTER_TYPE.R_T_OPENID_ARTICLE, * FileReporter.REPORTER_FILE_TYPE.EXCEL).write(); // KJSON API call. if * (GlobalVariables.wxFooListWithOpenIdArticle.size() >= 1) { new * KJsonApiQuery(GlobalVariables.wxFooListWithOpenIdArticle).query(); * LOG.debug("KJsonApiQuery processing..." + wxFoo.toString()); } */ } }