org.apache.oodt.cas.protocol.http.util.HttpUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.oodt.cas.protocol.http.util.HttpUtils.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.oodt.cas.protocol.http.util;

//JDK imports
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

//APACHE imports
import org.apache.commons.lang.Validate;

//OODT imports
import org.apache.oodt.cas.metadata.util.MimeTypeUtils;
import org.apache.oodt.cas.protocol.http.HttpFile;

/**
 * Utility methods for HTTP Protocol related tasks.
 *
 * @author bfoster
 */
public class HttpUtils {

    static final MimeTypeUtils MIME_TYPES = new MimeTypeUtils();

    // Pattern looking for <a href="(group-2)"/>(group-3)</a> . . . group-1 is for either " or '
    static final Pattern XHTML_LINK_PATTERN = Pattern
            .compile("<\\s*a\\s+href\\s*=\\s*(['\"])(.+?)\\1\\s*>(.+?)<\\s*/\\s*a\\s*>");
    static final Pattern LAZY_LINK_PATTERN = Pattern.compile("<\\s*a\\s+href\\s*=\\s*(['\"])(.+?)\\1\\s*/\\s*>");

    private HttpUtils() {
    }

    /**
     * Resolves a path against given {@link URI} and creates the resolved {@link URI}.
     * (i.e. base = "http://localhost" ; path = "/path/to/file" ; resolved = "http://localhost/path/to/file")
     * Handles all cases: if base already has a path, if path is relative, if path is absolute.
     * 
     * @param base The base {@link URI} which the given path will be resolved against.
     * @param path The path to be resolved against the given {@link URI}
     * @return resolved {@link URI}. 
     * @throws URISyntaxException
     */
    public static URI resolveUri(URI base, String path) throws URISyntaxException {
        Validate.notNull(base, "base URI must not be NULL");
        Validate.notNull(path, "resolve path must not be NULL");
        if (path.startsWith("http://")) {
            return new URI(path);
        } else if (path.startsWith("/")) {
            return new URI(base.getScheme() + "://" + base.getHost() + path);
        } else {
            if (base.toString().endsWith("/")) {
                return new URI(base.toString() + path);
            } else {
                return new URI(base.toString() + "/" + path);
            }
        }
    }

    public static HttpURLConnection connect(URL url) throws IOException {
        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
        conn.connect();
        conn.getResponseMessage();
        return conn;
    }

    public static boolean checkForRedirection(URL beforeConnUrl, URL afterConnUrl) {
        return !beforeConnUrl.toString().equals(afterConnUrl.toString());
    }

    public static String readUrl(HttpURLConnection conn) throws IOException {
        // create URL source reader
        Scanner scanner = new Scanner(conn.getInputStream());

        // Read in link
        StringBuilder sb = new StringBuilder("");
        while (scanner.hasNext()) {
            sb.append(scanner.nextLine());
        }

        return sb.toString();
    }

    public static List<HttpFile> findLinks(HttpFile file) throws IOException, URISyntaxException {
        Matcher matcher = XHTML_LINK_PATTERN.matcher(HttpUtils.readUrl(connect(file.getLink())));
        List<HttpFile> httpFiles = new ArrayList<HttpFile>();
        while (matcher.find()) {
            String link = matcher.group(2).trim();
            String virtualPath = matcher.group(3).trim();
            URL url = resolveUri(file.getLink().toURI(), link).toURL();
            httpFiles.add(new HttpFile(file, link, isDirectory(url, virtualPath), url));
        }
        matcher = LAZY_LINK_PATTERN.matcher(HttpUtils.readUrl(connect(file.getLink())));
        while (matcher.find()) {
            String link = matcher.group(2).trim();
            URL url = resolveUri(file.getLink().toURI(), link).toURL();
            httpFiles.add(new HttpFile(file, link, isDirectory(url, link), url));
        }
        return httpFiles;
    }

    public static boolean isDirectory(URL url, String virtualPath) throws IOException {
        try {
            String mime = MIME_TYPES.autoResolveContentType(url.toString(),
                    MimeTypeUtils.readMagicHeader(url.openStream()));
            return (mime.equals("text/html") && !virtualPath.endsWith(".html"));
        } catch (Exception e) {
            throw new IOException("URL does not exist '" + url + "'", e);
        }
    }
}