mitm.common.util.URIUtils.java Source code

Java tutorial

Introduction

Here is the source code for mitm.common.util.URIUtils.java

Source

/*
 * Copyright (c) 2008-2011, Martijn Brinkers, Djigzo.
 * 
 * This file is part of Djigzo email encryption.
 *
 * Djigzo is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License 
 * version 3, 19 November 2007 as published by the Free Software 
 * Foundation.
 *
 * Djigzo is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public 
 * License along with Djigzo. If not, see <http://www.gnu.org/licenses/>
 *
 * Additional permission under GNU AGPL version 3 section 7
 * 
 * If you modify this Program, or any covered work, by linking or 
 * combining it with aspectjrt.jar, aspectjweaver.jar, tyrex-1.0.3.jar, 
 * freemarker.jar, dom4j.jar, mx4j-jmx.jar, mx4j-tools.jar, 
 * spice-classman-1.0.jar, spice-loggerstore-0.5.jar, spice-salt-0.8.jar, 
 * spice-xmlpolicy-1.0.jar, saaj-api-1.3.jar, saaj-impl-1.3.jar, 
 * wsdl4j-1.6.1.jar (or modified versions of these libraries), 
 * containing parts covered by the terms of Eclipse Public License, 
 * tyrex license, freemarker license, dom4j license, mx4j license,
 * Spice Software License, Common Development and Distribution License
 * (CDDL), Common Public License (CPL) the licensors of this Program grant 
 * you additional permission to convey the resulting work.
 */
package mitm.common.util;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.regex.Pattern;

import org.apache.commons.httpclient.URIException;
import org.apache.commons.httpclient.util.URIUtil;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * URI helper functions for the validation of URI's, converting strings to URI etc.
 * 
 * @author Martijn Brinkers
 *
 */
public class URIUtils {
    private final static Logger logger = LoggerFactory.getLogger(URIUtils.class);

    public enum URIType {
        BASE, RELATIVE, FULL, ALL
    };

    /**
     * Regular expression that matches URLs.
     */
    public static String URL_REG_EXPR = "(?:(^|\\s))(([a-zA-Z+]){3,7}\\:\\/\\/|~/|/)?([\\w]+:\\w+@)?([a-zA-Z]{1}([\\w\\-]+\\.)+([\\w]{2,5}))(:[\\d]{1,5})?(/?[\\w-])*\\??"
            + "[^.!,?;\"\\'<>()\\[\\]\\{\\}\\s\\x7F-\\xFF]*([.!,?]+[^.!,?;\"'<>()\\[\\]\\{\\}\\s\\x7F-\\xFF]+)*";

    /**
     * This is a more strict matcher. It matches www.example.com and http://abc.com but not example.com
     */
    public static String HTTP_URL_REG_EXPR = "(?i)(?:(^|\\s))(((http|https)\\:\\/\\/)|(www\\.))([a-zA-Z]{1}([\\w\\-]+\\.)+([\\w]{2,5}))(:[\\d]{1,5})?(/?[\\w-])*\\??"
            + "[^@.!,?;\"\\'<>()\\[\\]\\{\\}\\s\\x7F-\\xFF]*([.!,?]+[^.!,?;\"'<>()\\[\\]\\{\\}\\s\\x7F-\\xFF]+)*";

    /**
     * Pattern for URL_REG_EXPR
     */
    public static final Pattern URL_PATTERN = Pattern.compile(URL_REG_EXPR);

    /**
     * Pattern for HTTP_URL_REG_EXPR
     */
    public static final Pattern HTTP_URL_PATTERN = Pattern.compile(HTTP_URL_REG_EXPR);

    /**
     * Tries to create a URI out of the string. It first tries to create a URI directly. If that fails
     * it tries to URI encode the string and then create the URI. 
     * @param identifier
     * @return
     */
    public static URI toURI(String identifier, boolean encode) throws URISyntaxException {
        identifier = StringUtils.trimToNull(identifier);

        if (identifier == null) {
            return null;
        }

        URI uri = null;

        try {
            uri = new URI(identifier);
        } catch (URISyntaxException e) {
            logger.debug("Not a valid URI. Trying encoded version. Identifier: " + identifier);

            if (!encode) {
                throw e;
            }

            /* 
             * try to URI encode the string
             */
            try {
                identifier = URIUtil.encodePathQuery(identifier);
            } catch (URIException urie) {
                throw new CausableURISyntaxException(identifier, urie.getMessage(), urie);
            }

            uri = new URI(identifier);
        }

        return uri;
    }

    /**
     * Checks whether the given identifier is a valid URI. If type is null, a FULL check will be done.
     * If identifier is null, false will be returned.
     */
    public static boolean isValidURI(String identifier, URIType type) {
        if (identifier == null) {
            return false;
        }

        if (type == null) {
            type = URIType.FULL;
        }

        boolean valid = false;

        try {
            URI uri = new URI(identifier);

            if (type == URIType.BASE) {
                /*
                 * Only accepts URI of the form [scheme:][//authority][path]
                 */
                if (StringUtils.isNotEmpty(uri.getScheme()) && StringUtils.isNotEmpty(uri.getHost())
                        && StringUtils.isEmpty(uri.getQuery()) && StringUtils.isEmpty(uri.getFragment())) {
                    valid = true;
                }
            } else if (type == URIType.RELATIVE) {
                /*
                 * Only accepts URI of the form [path][?query][#fragment] 
                 */
                if (StringUtils.isEmpty(uri.getScheme()) && StringUtils.isEmpty(uri.getAuthority())
                        && StringUtils.isEmpty(uri.getHost())) {
                    valid = true;
                }
            } else if (type == URIType.FULL) {
                /*
                 * Only accepts URI of the form [scheme:][//authority][path][?query][#fragment] 
                 */
                if (StringUtils.isNotEmpty(uri.getScheme()) && StringUtils.isNotEmpty(uri.getHost())) {
                    valid = true;
                }
            } else {
                valid = true;
            }

        } catch (URISyntaxException e) {
            // ignore
        }

        return valid;
    }
}