org.apdplat.qa.util.Tools.java Source code

Java tutorial

Introduction

Here is the source code for org.apdplat.qa.util.Tools.java

Source

/**
 * 
 * APDPlat - Application Product Development Platform
 * Copyright (c) 2013, ??, yang-shangchuan@qq.com
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 */

package org.apdplat.qa.util;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.ansj.domain.Term;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HostConfiguration;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apdplat.qa.datasource.DataSource;
import org.apdplat.qa.datasource.FileDataSource;
import org.apdplat.qa.model.Evidence;
import org.apdplat.qa.model.Question;
import org.apdplat.qa.parser.WordParser;
import org.json.JSONArray;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 
 * @author ??
 */
public class Tools {

    private static final Logger LOG = LoggerFactory.getLogger(Tools.class);
    private static Map<String, Integer> map = new HashMap<>();

    public static String getTimeDes(long ms) {
        int ss = 1000;
        int mi = ss * 60;
        int hh = mi * 60;
        int dd = hh * 24;

        long day = ms / dd;
        long hour = (ms - day * dd) / hh;
        long minute = (ms - day * dd - hour * hh) / mi;
        long second = (ms - day * dd - hour * hh - minute * mi) / ss;
        long milliSecond = ms - day * dd - hour * hh - minute * mi - second * ss;

        StringBuilder str = new StringBuilder();
        if (day > 0) {
            str.append(day).append(",");
        }
        if (hour > 0) {
            str.append(hour).append("?,");
        }
        if (minute > 0) {
            str.append(minute).append(",");
        }
        if (second > 0) {
            str.append(second).append(",");
        }
        if (milliSecond > 0) {
            str.append(milliSecond).append(",");
        }
        if (str.length() > 0) {
            str = str.deleteCharAt(str.length() - 1);
        }

        return str.toString();
    }

    public static <T> List<List<T>> getCom(List<T> list) {
        List<List<T>> result = new ArrayList<>();
        T[] data = (T[]) list.toArray();
        long max = 1 << data.length;
        for (int i = 1; i < max; i++) {
            List<T> sub = new ArrayList<>();
            for (int j = 0; j < data.length; j++) {
                if ((i & (1 << j)) != 0) {
                    sub.add(data[j]);
                }
            }
            result.add(sub);
        }
        return result;
    }

    public static void extractQuestions(String file) {
        //material???questions
        DataSource dataSource = new FileDataSource(file);
        List<Question> questions = dataSource.getQuestions();
        for (Question question : questions) {
            System.out.println(question.getQuestion().trim() + ":" + question.getExpectAnswer());
        }
    }

    public static void extractPatterns(String file, String pattern) {
        //material???questions
        DataSource dataSource = new FileDataSource(file);
        List<Question> questions = dataSource.getQuestions();
        for (Question question : questions) {
            System.out.println(pattern + " " + question.getQuestion().trim());
        }
    }

    public static int getIDF(String term) {
        Integer idf = map.get(term);
        if (idf == null) {
            return 0;
        }
        LOG.info("idf " + term + ":" + idf);
        return idf;
    }

    public static List<Map.Entry<String, Integer>> initIDF(List<Question> questions) {
        map = new HashMap<>();
        for (Question question : questions) {
            List<Evidence> evidences = question.getEvidences();
            for (Evidence evidence : evidences) {
                Set<String> set = new HashSet<>();
                List<Term> terms = WordParser.parse(evidence.getTitle() + evidence.getSnippet());
                for (Term term : terms) {
                    set.add(term.getName());
                }
                for (String item : set) {
                    Integer doc = map.get(item);
                    if (doc == null) {
                        doc = 1;
                    } else {
                        doc++;
                    }
                    map.put(item, doc);
                }
            }
        }
        List<Map.Entry<String, Integer>> list = Tools.sortByIntegerValue(map);
        for (Map.Entry<String, Integer> entry : list) {
            LOG.debug(entry.getKey() + " " + entry.getValue());
        }
        return list;
    }

    public static String getHTMLContent(String url) {
        try {
            BufferedReader reader = new BufferedReader(new InputStreamReader(new URL(url).openStream()));
            StringBuilder html = new StringBuilder();
            String line = reader.readLine();
            while (line != null) {
                html.append(line).append("\n");
                line = reader.readLine();
            }
            String content = TextExtract.parse(html.toString());
            return content;
        } catch (Exception e) {
            LOG.debug("?URL" + url, e);
        }
        return null;
    }

    /**
     * ?
     *
     * @param text
     * @return
     */
    public static List<Term> getTerms(String text) {
        List<Term> result = new ArrayList<>();
        List<Term> terms = WordParser.parse(text);
        for (Term term : terms) {
            result.add(term);
        }
        return result;
    }

    /**
     * text?pattern
     *
     * @param text
     * @param pattern
     * @return
     */
    public static int countsForSkipBigram(String text, String pattern) {
        int count = 0;
        Pattern p = Pattern.compile(pattern);
        Matcher matcher = p.matcher(text);
        while (matcher.find()) {
            LOG.debug("??" + matcher.group());
            count++;
        }
        return count;
    }

    /**
     * text?pattern
     *
     * @param text
     * @param pattern
     * @return
     */
    public static int countsForBigram(String text, String pattern) {
        int count = 0;
        int index = -1;
        while (true) {
            index = text.indexOf(pattern, index + 1);
            if (index > 0) {
                LOG.debug("?: " + pattern + " ?" + index);
                count++;
            } else {
                break;
            }
        }
        return count;
    }

    /**
     * ?MAPVALUE???
     *
     * @param map
     * @return
     */
    public static <K> List<Map.Entry<K, Integer>> sortByIntegerValue(Map<K, Integer> map) {
        List<Map.Entry<K, Integer>> orderList = new ArrayList<>(map.entrySet());
        Collections.sort(orderList, new Comparator<Map.Entry<K, Integer>>() {
            @Override
            public int compare(Map.Entry<K, Integer> o1, Map.Entry<K, Integer> o2) {
                return (o1.getValue() - o2.getValue());
            }
        });
        return orderList;
    }

    /**
     * ?MAPVALUE???
     *
     * @param map
     * @return
     */
    public static <K> List<Map.Entry<K, Double>> sortByDoubleValue(Map<K, Double> map) {
        List<Map.Entry<K, Double>> orderList = new ArrayList<>(map.entrySet());
        Collections.sort(orderList, new Comparator<Map.Entry<K, Double>>() {
            @Override
            public int compare(Map.Entry<K, Double> o1, Map.Entry<K, Double> o2) {
                double abs = o1.getValue() - o2.getValue();
                if (abs < 0) {
                    return -1;
                }
                if (abs > 0) {
                    return 1;
                }

                return 0;
            }
        });
        return orderList;
    }

    public static void createAndWriteFile(String path, String text) {
        BufferedWriter writer = null;
        try {
            File file = new File(path);
            if (!file.getParentFile().exists()) {
                file.getParentFile().mkdirs();
            }
            file.createNewFile();
            writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "utf-8"));
            writer.write(text);
        } catch (Exception ex) {
            LOG.error("?", ex);
        } finally {
            if (writer != null) {
                try {
                    writer.close();
                } catch (IOException ex) {
                    LOG.error("", ex);
                }
            }
        }
    }

    public static String getRewindEvidenceText(String question, String answer) {
        //1??
        String rewindEvidenceText = MySQLUtils.getRewindEvidenceText(question, answer);
        if (rewindEvidenceText != null) {
            //?
            LOG.info("?" + question + " " + answer);
            return rewindEvidenceText;
        }
        //2???google
        StringBuilder text = new StringBuilder();
        String query = question + answer;
        try {
            query = URLEncoder.encode(query, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            LOG.error("url", e);
            return null;
        }
        query = "http://ajax.googleapis.com/ajax/services/search/web?start=0&rsz=large&v=1.0&q=" + query;
        try {
            HostConfiguration hcf = new HostConfiguration();
            hcf.setProxy("127.0.0.1", 8087);

            HttpClient httpClient = new HttpClient();
            GetMethod getMethod = new GetMethod(query);

            //httpClient.executeMethod(hcf, getMethod);
            httpClient.executeMethod(getMethod);
            getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());

            int statusCode = httpClient.executeMethod(getMethod);
            if (statusCode != HttpStatus.SC_OK) {
                LOG.error("Method failed: " + getMethod.getStatusLine());
            }
            byte[] responseBody = getMethod.getResponseBody();
            String response = new String(responseBody, "UTF-8");
            LOG.debug("??" + response);
            JSONObject json = new JSONObject(response);
            String totalResult = json.getJSONObject("responseData").getJSONObject("cursor")
                    .getString("estimatedResultCount");
            int totalResultCount = Integer.parseInt(totalResult);
            LOG.info("? " + totalResultCount);

            JSONArray results = json.getJSONObject("responseData").getJSONArray("results");

            LOG.debug(" Results:");
            for (int i = 0; i < results.length(); i++) {
                JSONObject result = results.getJSONObject(i);
                String title = result.getString("titleNoFormatting");
                LOG.debug(title);
                //URL???
                String url = result.get("url").toString();
                String content = null;//Tools.getHTMLContent(url);
                if (content == null) {
                    //????
                    content = result.get("content").toString();
                    content = content.replaceAll("<b>", "");
                    content = content.replaceAll("</b>", "");
                    content = content.replaceAll("\\.\\.\\.", "");
                }
                LOG.debug(content);
                text.append(title).append(content);
            }
            LOG.info("" + question + " " + answer + " MySQL?");
            MySQLUtils.saveRewindEvidenceText(question, answer, text.toString());
            return text.toString();
        } catch (Exception e) {
            LOG.debug("?", e);
        }
        return null;
    }

    public static byte[] readAll(InputStream in) {
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        try {
            byte[] buffer = new byte[1024];
            for (int n; (n = in.read(buffer)) > 0;) {
                out.write(buffer, 0, n);
            }
        } catch (IOException ex) {
            LOG.error("?", ex);
        }
        return out.toByteArray();
    }

    public static String getAppPath(Class cls) {
        // ??
        if (cls == null) {
            throw new IllegalArgumentException("???");
        }
        ClassLoader loader = cls.getClassLoader();
        // ????
        String clsName = cls.getName() + ".class";
        // ?
        Package pack = cls.getPackage();
        String path = "";
        // ?????
        if (pack != null) {
            String packName = pack.getName();
            // ??JavaJDK
            if (packName.startsWith("java.") || packName.startsWith("javax.")) {
                throw new IllegalArgumentException("????");
            }
            // ??????
            clsName = clsName.substring(packName.length() + 1);
            // ?????????
            if (packName.indexOf(".") < 0) {
                path = packName + "/";
            } else {
                // ???????
                int start = 0, end = 0;
                end = packName.indexOf(".");
                while (end != -1) {
                    path = path + packName.substring(start, end) + "/";
                    start = end + 1;
                    end = packName.indexOf(".", start);
                }
                path = path + packName.substring(start) + "/";
            }
        }
        // ClassLoadergetResource????
        URL url = loader.getResource(path + clsName);
        // URL??
        String realPath = url.getPath();
        // ?????"file:"
        int pos = realPath.indexOf("file:");
        if (pos > -1) {
            realPath = realPath.substring(pos + 5);
        }
        // ????
        pos = realPath.indexOf(path + clsName);
        realPath = realPath.substring(0, pos - 1);
        // JARJAR??
        if (realPath.endsWith("!")) {
            realPath = realPath.substring(0, realPath.lastIndexOf("/"));
        }
        /*------------------------------------------------------------  
         ClassLoadergetResourceutf-8??  
         ???  
         URLDecoderdecode?  
         ?  
         -------------------------------------------------------------*/
        try {
            realPath = URLDecoder.decode(realPath, "utf-8");
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        return realPath;
    }

    public static Set<String> getQuestions(String file) {
        //Set??????
        Set<String> result = new HashSet<>();
        BufferedReader reader = null;
        try {
            reader = new BufferedReader(new InputStreamReader(Tools.class.getResourceAsStream(file), "utf-8"));

            String line;
            while ((line = reader.readLine()) != null) {
                //?
                line = line.trim().replace("?", "").replace("", "");
                if (line.equals("") || line.startsWith("#") || line.indexOf("#") == 1 || line.length() < 3) {
                    continue;
                }
                result.add(line);
            }
        } catch (Exception e) {
            LOG.error("", e);
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e) {
                    LOG.error("", e);
                }
            }
        }
        return result;
    }
}