bixo.fetcher.simulation.FakeRobotsFetcher.java Source code

Java tutorial

Introduction

Here is the source code for bixo.fetcher.simulation.FakeRobotsFetcher.java

Source

/*
 * Copyright 2013-2015 Scale Unlimited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package bixo.fetcher.simulation;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.Random;

import org.apache.http.HttpStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import bixo.config.FetcherPolicy;
import bixo.config.UserAgent;
import bixo.datum.ContentBytes;
import bixo.datum.FetchedDatum;
import bixo.datum.HttpHeaders;
import bixo.datum.ScoredUrlDatum;
import bixo.exceptions.BaseFetchException;
import bixo.exceptions.HttpFetchException;
import bixo.exceptions.UrlFetchException;
import bixo.fetcher.BaseFetcher;

@SuppressWarnings("serial")
public class FakeRobotsFetcher extends BaseFetcher {
    private static Logger LOGGER = LoggerFactory.getLogger(FakeRobotsFetcher.class);
    private static final UserAgent ROBOTS_TEST_AGENT = new UserAgent("test", "test@domain.com",
            "http://test.domain.com");

    private boolean _randomFetching;
    private Random _rand;

    public FakeRobotsFetcher(int maxThreads) {
        this(maxThreads, ROBOTS_TEST_AGENT, true);
    }

    public FakeRobotsFetcher(int maxThreads, UserAgent userAgent) {
        this(maxThreads, userAgent, true);
    }

    public FakeRobotsFetcher(int maxThreads, UserAgent userAgent, boolean randomFetching) {
        super(maxThreads, new FetcherPolicy(), userAgent);
        _randomFetching = randomFetching;
        _rand = new Random();
    }

    @Override
    public void abort() {
        // Do nothing
    }

    @Override
    public FetchedDatum get(ScoredUrlDatum scoredUrl) throws BaseFetchException {
        String url = scoredUrl.getUrl();
        LOGGER.trace("Fake fetching " + url);

        URL theUrl;
        try {
            theUrl = new URL(url);
        } catch (MalformedURLException e) {
            throw new UrlFetchException(url, e.getMessage());
        }

        int statusCode = HttpStatus.SC_OK;
        int contentSize = 10000;
        int bytesPerSecond = 100000;

        if (_randomFetching) {
            contentSize = Math.max(0, (int) (_rand.nextGaussian() * 5000.0) + 10000) + 100;
            bytesPerSecond = Math.max(0, (int) (_rand.nextGaussian() * 25000.0) + 50000) + 1000;
        } else {
            String query = theUrl.getQuery();
            if (query != null) {
                String[] params = query.split("&");
                for (String param : params) {
                    String[] keyValue = param.split("=");
                    if (keyValue[0].equals("status")) {
                        statusCode = Integer.parseInt(keyValue[1]);
                    } else if (keyValue[0].equals("size")) {
                        contentSize = Integer.parseInt(keyValue[1]);
                    } else if (keyValue[0].equals("speed")) {
                        bytesPerSecond = Integer.parseInt(keyValue[1]);
                    } else {
                        LOGGER.warn("Unknown fake URL parameter: " + keyValue[0]);
                    }
                }
            }
        }

        if (statusCode != HttpStatus.SC_OK) {
            throw new HttpFetchException(url, "Exception requested from FakeHttpFetcher", statusCode, null);
        }

        // Now we want to delay for as long as it would take to fill in the data.
        float duration = (float) contentSize / (float) bytesPerSecond;
        LOGGER.trace(String.format("Fake fetching %d bytes at %d bps (%fs) from %s", contentSize, bytesPerSecond,
                duration, url));
        try {
            Thread.sleep((long) (duration * 1000.0));
        } catch (InterruptedException e) {
            // Break out of our delay, but preserve interrupt state.
            Thread.currentThread().interrupt();
        }

        HttpHeaders headers = new HttpHeaders();
        headers.add("x-responserate", "" + bytesPerSecond);
        FetchedDatum result = new FetchedDatum(url, url, System.currentTimeMillis(), headers,
                new ContentBytes(new byte[contentSize]), "text/html", bytesPerSecond);

        result.setPayload(scoredUrl.getPayload());
        return result;
    }

}