Java tutorial
/** * * APDPlat - Application Product Development Platform * Copyright (c) 2013, ??, yang-shangchuan@qq.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package org.apdplat.qa.util; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.ansj.domain.Term; import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; import org.apache.commons.httpclient.HostConfiguration; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.params.HttpMethodParams; import org.apdplat.qa.datasource.DataSource; import org.apdplat.qa.datasource.FileDataSource; import org.apdplat.qa.model.Evidence; import org.apdplat.qa.model.Question; import org.apdplat.qa.parser.WordParser; import org.json.JSONArray; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * * @author ?? */ public class Tools { private static final Logger LOG = LoggerFactory.getLogger(Tools.class); private static Map<String, Integer> map = new HashMap<>(); public static String getTimeDes(long ms) { int ss = 1000; int mi = ss * 60; int hh = mi * 60; int dd = hh * 24; long day = ms / dd; long hour = (ms - day * dd) / hh; long minute = (ms - day * dd - hour * hh) / mi; long second = (ms - day * dd - hour * hh - minute * mi) / ss; long milliSecond = ms - day * dd - hour * hh - minute * mi - second * ss; StringBuilder str = new StringBuilder(); if (day > 0) { str.append(day).append(","); } if (hour > 0) { str.append(hour).append("?,"); } if (minute > 0) { str.append(minute).append(","); } if (second > 0) { str.append(second).append(","); } if (milliSecond > 0) { str.append(milliSecond).append(","); } if (str.length() > 0) { str = str.deleteCharAt(str.length() - 1); } return str.toString(); } public static <T> List<List<T>> getCom(List<T> list) { List<List<T>> result = new ArrayList<>(); T[] data = (T[]) list.toArray(); long max = 1 << data.length; for (int i = 1; i < max; i++) { List<T> sub = new ArrayList<>(); for (int j = 0; j < data.length; j++) { if ((i & (1 << j)) != 0) { sub.add(data[j]); } } result.add(sub); } return result; } public static void extractQuestions(String file) { //material???questions DataSource dataSource = new FileDataSource(file); List<Question> questions = dataSource.getQuestions(); for (Question question : questions) { System.out.println(question.getQuestion().trim() + ":" + question.getExpectAnswer()); } } public static void extractPatterns(String file, String pattern) { //material???questions DataSource dataSource = new FileDataSource(file); List<Question> questions = dataSource.getQuestions(); for (Question question : questions) { System.out.println(pattern + " " + question.getQuestion().trim()); } } public static int getIDF(String term) { Integer idf = map.get(term); if (idf == null) { return 0; } LOG.info("idf " + term + ":" + idf); return idf; } public static List<Map.Entry<String, Integer>> initIDF(List<Question> questions) { map = new HashMap<>(); for (Question question : questions) { List<Evidence> evidences = question.getEvidences(); for (Evidence evidence : evidences) { Set<String> set = new HashSet<>(); List<Term> terms = WordParser.parse(evidence.getTitle() + evidence.getSnippet()); for (Term term : terms) { set.add(term.getName()); } for (String item : set) { Integer doc = map.get(item); if (doc == null) { doc = 1; } else { doc++; } map.put(item, doc); } } } List<Map.Entry<String, Integer>> list = Tools.sortByIntegerValue(map); for (Map.Entry<String, Integer> entry : list) { LOG.debug(entry.getKey() + " " + entry.getValue()); } return list; } public static String getHTMLContent(String url) { try { BufferedReader reader = new BufferedReader(new InputStreamReader(new URL(url).openStream())); StringBuilder html = new StringBuilder(); String line = reader.readLine(); while (line != null) { html.append(line).append("\n"); line = reader.readLine(); } String content = TextExtract.parse(html.toString()); return content; } catch (Exception e) { LOG.debug("?URL" + url, e); } return null; } /** * ? * * @param text * @return */ public static List<Term> getTerms(String text) { List<Term> result = new ArrayList<>(); List<Term> terms = WordParser.parse(text); for (Term term : terms) { result.add(term); } return result; } /** * text?pattern * * @param text * @param pattern * @return */ public static int countsForSkipBigram(String text, String pattern) { int count = 0; Pattern p = Pattern.compile(pattern); Matcher matcher = p.matcher(text); while (matcher.find()) { LOG.debug("??" + matcher.group()); count++; } return count; } /** * text?pattern * * @param text * @param pattern * @return */ public static int countsForBigram(String text, String pattern) { int count = 0; int index = -1; while (true) { index = text.indexOf(pattern, index + 1); if (index > 0) { LOG.debug("?: " + pattern + " ?" + index); count++; } else { break; } } return count; } /** * ?MAPVALUE??? * * @param map * @return */ public static <K> List<Map.Entry<K, Integer>> sortByIntegerValue(Map<K, Integer> map) { List<Map.Entry<K, Integer>> orderList = new ArrayList<>(map.entrySet()); Collections.sort(orderList, new Comparator<Map.Entry<K, Integer>>() { @Override public int compare(Map.Entry<K, Integer> o1, Map.Entry<K, Integer> o2) { return (o1.getValue() - o2.getValue()); } }); return orderList; } /** * ?MAPVALUE??? * * @param map * @return */ public static <K> List<Map.Entry<K, Double>> sortByDoubleValue(Map<K, Double> map) { List<Map.Entry<K, Double>> orderList = new ArrayList<>(map.entrySet()); Collections.sort(orderList, new Comparator<Map.Entry<K, Double>>() { @Override public int compare(Map.Entry<K, Double> o1, Map.Entry<K, Double> o2) { double abs = o1.getValue() - o2.getValue(); if (abs < 0) { return -1; } if (abs > 0) { return 1; } return 0; } }); return orderList; } public static void createAndWriteFile(String path, String text) { BufferedWriter writer = null; try { File file = new File(path); if (!file.getParentFile().exists()) { file.getParentFile().mkdirs(); } file.createNewFile(); writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "utf-8")); writer.write(text); } catch (Exception ex) { LOG.error("?", ex); } finally { if (writer != null) { try { writer.close(); } catch (IOException ex) { LOG.error("", ex); } } } } public static String getRewindEvidenceText(String question, String answer) { //1?? String rewindEvidenceText = MySQLUtils.getRewindEvidenceText(question, answer); if (rewindEvidenceText != null) { //? LOG.info("?" + question + " " + answer); return rewindEvidenceText; } //2???google StringBuilder text = new StringBuilder(); String query = question + answer; try { query = URLEncoder.encode(query, "UTF-8"); } catch (UnsupportedEncodingException e) { LOG.error("url", e); return null; } query = "http://ajax.googleapis.com/ajax/services/search/web?start=0&rsz=large&v=1.0&q=" + query; try { HostConfiguration hcf = new HostConfiguration(); hcf.setProxy("127.0.0.1", 8087); HttpClient httpClient = new HttpClient(); GetMethod getMethod = new GetMethod(query); //httpClient.executeMethod(hcf, getMethod); httpClient.executeMethod(getMethod); getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler()); int statusCode = httpClient.executeMethod(getMethod); if (statusCode != HttpStatus.SC_OK) { LOG.error("Method failed: " + getMethod.getStatusLine()); } byte[] responseBody = getMethod.getResponseBody(); String response = new String(responseBody, "UTF-8"); LOG.debug("??" + response); JSONObject json = new JSONObject(response); String totalResult = json.getJSONObject("responseData").getJSONObject("cursor") .getString("estimatedResultCount"); int totalResultCount = Integer.parseInt(totalResult); LOG.info("? " + totalResultCount); JSONArray results = json.getJSONObject("responseData").getJSONArray("results"); LOG.debug(" Results:"); for (int i = 0; i < results.length(); i++) { JSONObject result = results.getJSONObject(i); String title = result.getString("titleNoFormatting"); LOG.debug(title); //URL??? String url = result.get("url").toString(); String content = null;//Tools.getHTMLContent(url); if (content == null) { //???? content = result.get("content").toString(); content = content.replaceAll("<b>", ""); content = content.replaceAll("</b>", ""); content = content.replaceAll("\\.\\.\\.", ""); } LOG.debug(content); text.append(title).append(content); } LOG.info("" + question + " " + answer + " MySQL?"); MySQLUtils.saveRewindEvidenceText(question, answer, text.toString()); return text.toString(); } catch (Exception e) { LOG.debug("?", e); } return null; } public static byte[] readAll(InputStream in) { ByteArrayOutputStream out = new ByteArrayOutputStream(); try { byte[] buffer = new byte[1024]; for (int n; (n = in.read(buffer)) > 0;) { out.write(buffer, 0, n); } } catch (IOException ex) { LOG.error("?", ex); } return out.toByteArray(); } public static String getAppPath(Class cls) { // ?? if (cls == null) { throw new IllegalArgumentException("???"); } ClassLoader loader = cls.getClassLoader(); // ???? String clsName = cls.getName() + ".class"; // ? Package pack = cls.getPackage(); String path = ""; // ????? if (pack != null) { String packName = pack.getName(); // ??JavaJDK if (packName.startsWith("java.") || packName.startsWith("javax.")) { throw new IllegalArgumentException("????"); } // ?????? clsName = clsName.substring(packName.length() + 1); // ????????? if (packName.indexOf(".") < 0) { path = packName + "/"; } else { // ??????? int start = 0, end = 0; end = packName.indexOf("."); while (end != -1) { path = path + packName.substring(start, end) + "/"; start = end + 1; end = packName.indexOf(".", start); } path = path + packName.substring(start) + "/"; } } // ClassLoadergetResource???? URL url = loader.getResource(path + clsName); // URL?? String realPath = url.getPath(); // ?????"file:" int pos = realPath.indexOf("file:"); if (pos > -1) { realPath = realPath.substring(pos + 5); } // ???? pos = realPath.indexOf(path + clsName); realPath = realPath.substring(0, pos - 1); // JARJAR?? if (realPath.endsWith("!")) { realPath = realPath.substring(0, realPath.lastIndexOf("/")); } /*------------------------------------------------------------ ClassLoadergetResourceutf-8?? ??? URLDecoderdecode? ? -------------------------------------------------------------*/ try { realPath = URLDecoder.decode(realPath, "utf-8"); } catch (Exception e) { throw new RuntimeException(e); } return realPath; } public static Set<String> getQuestions(String file) { //Set?????? Set<String> result = new HashSet<>(); BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(Tools.class.getResourceAsStream(file), "utf-8")); String line; while ((line = reader.readLine()) != null) { //? line = line.trim().replace("?", "").replace("", ""); if (line.equals("") || line.startsWith("#") || line.indexOf("#") == 1 || line.length() < 3) { continue; } result.add(line); } } catch (Exception e) { LOG.error("", e); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { LOG.error("", e); } } } return result; } }