com.esri.gpt.framework.robots.BotsParserTest.java Source code

Java tutorial

Introduction

Here is the source code for com.esri.gpt.framework.robots.BotsParserTest.java

Source

/*
 * Copyright 2015 pete5162.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.esri.gpt.framework.robots;

import static com.esri.gpt.framework.robots.BotsUtils.requestAccess;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.io.IOUtils;
import org.junit.Test;
import static org.junit.Assert.*;
import org.junit.Before;

/**
 *
 * @author pete5162
 */
public class BotsParserTest {
    private String robotsTxt;

    @Before
    public void setUp() throws IOException {
        InputStream inputRobots = Thread.currentThread().getContextClassLoader()
                .getResourceAsStream("com/esri/gpt/framework/robots/robots.txt");
        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
        IOUtils.copy(inputRobots, buffer);
        IOUtils.closeQuietly(inputRobots);
        IOUtils.closeQuietly(buffer);

        robotsTxt = buffer.toString("UTF-8");
    }

    @Test
    public void testGeoportalServerUserAgent() throws IOException {
        BotsParser parser = new BotsParser(true, true, "GeoportalServer");

        InputStream input = new ByteArrayInputStream(robotsTxt.getBytes("UTF-8"));
        Bots bots = parser.readRobotsTxt(BotsMode.always, MatchingStrategy.SIMPLE_PATTERN_STRATEGY,
                WinningStrategy.LONGEST_PATH_STRATEGY, input);
        assertNotNull(bots);

        assertTrue(!requestAccess(bots, "http://www.fict.org/").hasAccess());
        assertTrue(!requestAccess(bots, "http://www.fict.org/index.html").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/robots.txt").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/server.html").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/services/fast.html").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/services/slow.html").hasAccess());
        assertTrue(!requestAccess(bots, "http://www.fict.org/orgo.gif").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/org/about.html").hasAccess());
        assertTrue(!requestAccess(bots, "http://www.fict.org/org/plans.html").hasAccess());
        assertTrue(!requestAccess(bots, "http://www.fict.org/%7Ejim/jim.html").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/%7Emak/mak.html").hasAccess());
    }

    @Test
    public void testUnhipbotUserAgent() throws IOException {
        BotsParser parser = new BotsParser(true, true, "Unhipbot");

        InputStream input = new ByteArrayInputStream(robotsTxt.getBytes("UTF-8"));
        Bots bots = parser.readRobotsTxt(BotsMode.always, MatchingStrategy.SIMPLE_PATTERN_STRATEGY,
                WinningStrategy.FIRST_MATCH_STRATEGY, input);
        assertNotNull(bots);

        assertTrue(!requestAccess(bots, "http://www.fict.org/").hasAccess());
        assertTrue(!requestAccess(bots, "http://www.fict.org/index.html").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/robots.txt").hasAccess());
        assertTrue(!requestAccess(bots, "http://www.fict.org/server.html").hasAccess());
        assertTrue(!requestAccess(bots, "http://www.fict.org/services/fast.html").hasAccess());
        assertTrue(!requestAccess(bots, "http://www.fict.org/services/slow.html").hasAccess());
        assertTrue(!requestAccess(bots, "http://www.fict.org/orgo.gif").hasAccess());
        assertTrue(!requestAccess(bots, "http://www.fict.org/org/about.html").hasAccess());
        assertTrue(!requestAccess(bots, "http://www.fict.org/org/plans.html").hasAccess());
        assertTrue(!requestAccess(bots, "http://www.fict.org/%7Ejim/jim.html").hasAccess());
        assertTrue(!requestAccess(bots, "http://www.fict.org/%7Emak/mak.html").hasAccess());
    }

    @Test
    public void testWebcrawlerUserAgent() throws IOException {
        BotsParser parser = new BotsParser(true, true, "Webcrawler");

        InputStream input = new ByteArrayInputStream(robotsTxt.getBytes("UTF-8"));
        Bots bots = parser.readRobotsTxt(BotsMode.always, MatchingStrategy.SIMPLE_PATTERN_STRATEGY,
                WinningStrategy.FIRST_MATCH_STRATEGY, input);
        assertNotNull(bots);

        assertTrue(requestAccess(bots, "http://www.fict.org/").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/index.html").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/robots.txt").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/server.html").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/services/fast.html").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/services/slow.html").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/orgo.gif").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/org/about.html").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/org/plans.html").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/%7Ejim/jim.html").hasAccess());
        assertTrue(requestAccess(bots, "http://www.fict.org/%7Emak/mak.html").hasAccess());
    }

}