silvertrout.plugins.titlegiver.TitleGiver.java Source code

Java tutorial

Introduction

Here is the source code for silvertrout.plugins.titlegiver.TitleGiver.java

Source

/*   _______ __ __                    _______                    __
 *  |     __|__|  |.--.--.-----.----.|_     _|.----.-----.--.--.|  |_
 *  |__     |  |  ||  |  |  -__|   _|  |   |  |   _|  _  |  |  ||   _|
 *  |_______|__|__| \___/|_____|__|    |___|  |__| |_____|_____||____|
 *
 *  Copyright 2008 - Gustav Tiger, Henrik Steen and Gustav "Gussoh" Sohtell
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package silvertrout.plugins.titlegiver;

import java.util.Collection;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringEscapeUtils;

import silvertrout.Channel;
import silvertrout.User;
import silvertrout.commons.ConnectHelper;

/**
 * JBT-plugin to fetch a web page's title and print it to the channel
 *
 * Beta version with URL and URLConnection support. Tries to do some charset
 * conversion based on HTTP headers. TODO: Check meta tag for additional
 * information about charset.
 *
 * @author reggna
 * @author tigge
 * @version Beta 3.0
 */
public class TitleGiver extends silvertrout.Plugin {

    // Max content length (in bytes) to grab to check for header
    private static final int maxContentLength = 4096;

    @Override
    public void onPrivmsg(User user, Channel channel, String message) {
        List<String> s = getTitles(message);
        s.addAll(getSpotifyTitles(message));
        for (String t : s) {
            if (channel != null) {
                channel.sendPrivmsg(t);
            } else {
                getNetwork().getConnection().sendPrivmsg(user.getNickname(), t);
            }
        }
    }

    protected static String createProtocolPattern() {
        return "(http|https):\\/\\/";
    }

    protected static String createHostPattern() {
        // TODO support IP-addresses
        String letter = "\\p{L}";//"A-Za-z";
        String digit = "\\d";
        String let_dig = "[" + letter + digit + "]";
        String let_dig_hyp = "[" + letter + "\\-" + digit + "]";
        String ldh_str = let_dig_hyp + "+";
        String label = "[" + letter + "]" + "(?:(?:" + ldh_str + ")?" + let_dig + ")?";
        String subdomain = "((?:" + label + "\\.)*" + label + ")";
        return subdomain;
    }

    protected static String createPortPattern() {
        return "(?:\\:(\\d+))?";
    }

    protected static String createPathPattern() {
        String unreserved = "\\w\\-\\.~"; // '_' is part of \w
        String sub_delims = "!\\$&'\\(\\)\\*\\+,;=";
        String pct_encoded = "(?:%\\d\\d)";

        String pchar = "[" + unreserved + "|" + pct_encoded + "|" + sub_delims + "|:|@]";
        //(?:(?:\/pchar)|(?:\/(?:\s|$)))*
        return "((?:(?:\\/" + pchar + "+)|(?:\\/(?:\\s|$)))*)";//"( (?:\\/(?:[-\\w\\%\\?~\\.\\d!\\$&'\\(\\)\\*\\+,;\\=:@]+)|\\/\\s)* )?";
    }

    protected static String createURLPattern() {
        String protocol = createProtocolPattern();
        String host = createHostPattern();
        String port = createPortPattern();
        String path = createPathPattern();

        // It's the create methods responsibility to make sure that the regex groups are correct
        return "(?:(?:<|\\s|\\\"|^)" + protocol + host + port + path + "(?:\\s|$|\\\"|>*))";
    }

    /**
     *
     * @param message
     * @return
     */
    public static List<String> getTitles(String message) {
        System.out.println("Title: " + message);
        java.util.ArrayList<String> r = new java.util.ArrayList<String>();
        // TODO Parse IP hosts
        // "(http|https):\\/\\/(\\w+(\\.\\w+)*)(?:\\:(\\d+))?((?:\\/(?:[-\\w\\%\\?~\\.\\d!\\$&'\\(\\)\\*\\+,;\\=:@]+)|\\/\\s)*)?"
        // "(http|https):\\/\\/([\\w\\.-]+)(?:\\:(\\d+))?([\\/\\_\\+\\-\\w\\?\\%\\&\\(\\)\\.\\=\\\\\\,]*)?"
        //      "(http|https):\\/\\/([\\w]+(?:\\.[\\w]+)*)(?:\\:(\\d+))?((?:\\/(?:[-\\w\\%\\?~\\.\\d!\\$&'\\(\\)\\*\\+,;\\=:@]+)|\\/\\s)*)?"
        Pattern p = Pattern.compile(createURLPattern());
        //p.
        Matcher m = p.matcher(message);
        while (m.find()) {
            String title = getTitle(m.group(0), m.group(1), m.group(2), m.group(3), m.group(4));
            if (title != null && !title.equals("")) {
                r.add(title);
            }
        }
        return r;
    }

    public static Collection<String> getSpotifyTitles(String message) {
        return SpotifyGiver.parse(message);
    }

    /**
     *
     * @param url
     * @param connection
     * @param server
     * @param port
     * @param file
     * @return
     */
    public static String getTitle(String url, String connection, String server, String port, String file) {
        if (port != null) {
            return getTitle(url, connection, server, Integer.parseInt(port), file);
        } else {
            return getTitle(url, connection, server, 0, file);
        }
    }

    /**
     *
     * @param url
     * @param connection
     * @param server
     * @param port
     * @param file
     * @return
     */
    public static String getTitle(String url, String connection, String server, int port, String file) {

        /* Set current port */
        if (port == 0) {
            if (connection.equals("https")) {
                port = 443;
            } else {
                port = 80;
            }
        }

        // Find title
        String page = ConnectHelper.Connect(connection, server, file, port, maxContentLength * 10, null, null);
        if (page != null) {
            String titlePattern = "(?is)<title>([^<]+)";
            Pattern pt = Pattern.compile(titlePattern);
            Matcher mt = pt.matcher(page);

            if (mt.find()) {
                String title = mt.group(1);
                return StringEscapeUtils.unescapeHtml4(title.replaceAll("\\s+", " ").trim());
            } else if (server.equals("t.co")) {
                // TODO(reggna): Move this to the ConnectHelper, and do proper
                // META "redirects".  Be careful not to not create a redirect
                // loop like this though.
                Pattern pattern = Pattern.compile("<META http-equiv=\"refresh\"(?:[^URL]+)URL=([^\"]+)");
                Matcher matcher = pattern.matcher(page);
                if (matcher.find()) {
                    List<String> titles = getTitles(matcher.group(1));
                    if (titles.size() > 0) {
                        return titles.get(0);
                    }
                }
            }
            System.out.println("No title found");
        }
        System.out.println("Could be no content (if no title found)");
        System.out.println("Failed on fetching title for some reason");
        return null;

    }

    /* For offline test: */
    /*public static void main(String args[]) {
    List<String> s = getTitles(args[0]);
    for(String t: s) System.out.println("Title: " +t);
    }*/
}