com.qwazr.crawler.web.manager.CurrentURIImpl.java Source code

Java tutorial

Introduction

Here is the source code for com.qwazr.crawler.web.manager.CurrentURIImpl.java

Source

/**
 * Copyright 2014-2016 Emmanuel Keller / QWAZR
 * <p>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 **/
package com.qwazr.crawler.web.manager;

import com.qwazr.crawler.web.CurrentURI;
import com.qwazr.crawler.web.driver.BrowserDriver;
import com.qwazr.utils.LinkUtils;
import org.apache.commons.lang3.StringUtils;

import java.net.URI;
import java.util.Collection;

class CurrentURIImpl implements CurrentURI {

    final private Integer depth;

    final private URI initialURI;
    private volatile URI finalURI;
    private volatile URI baseURI;

    private volatile boolean isIgnored = false;
    private volatile boolean isCrawled = false;
    private volatile boolean isRedirected = false;
    private volatile Boolean isInInclusion = null;
    private volatile Boolean isInExclusion = null;
    private volatile boolean isStartDomain = false;
    private volatile boolean isStartSubDomain = false;

    private String error = null;

    private volatile Collection<URI> sameLevelLinks = null;
    private volatile Collection<URI> nextLevelLinks = null;
    private volatile Collection<URI> filteredLinks = null;

    CurrentURIImpl(URI uri, Integer depth) {
        this.initialURI = uri;
        this.depth = depth;
    }

    @Override
    public URI getInitialURI() {
        return initialURI;
    }

    @Override
    public URI getUri() {
        return getURI();
    }

    @Override
    public URI getURI() {
        return finalURI != null ? finalURI : initialURI;
    }

    @Override
    public Integer getDepth() {
        return depth;
    }

    void setBaseURI(URI uri) {
        baseURI = uri;
    }

    void setFinalURI(URI uri) {
        finalURI = uri;
        if (finalURI != null)
            isRedirected = !finalURI.equals(initialURI);
    }

    void setInInclusion(Boolean isInInclusion) {
        this.isInInclusion = isInInclusion;
    }

    @Override
    public Boolean isInInclusion() {
        return isInInclusion;
    }

    public void setInExclusion(Boolean isInExclusion) {
        this.isInExclusion = isInExclusion;
    }

    @Override
    public Boolean isInExclusion() {
        return isInExclusion;
    }

    @Override
    public void setIgnored(boolean ignored) {
        isIgnored = ignored;
    }

    @Override
    public boolean isIgnored() {
        return isIgnored;
    }

    void setCrawled() {
        isCrawled = true;
    }

    @Override
    public boolean isCrawled() {
        return isCrawled;
    }

    @Override
    public boolean isRedirected() {
        return isRedirected;
    }

    void setError(BrowserDriver driver, Exception e) {
        if (e == null) {
            error = null;
            return;
        }
        error = driver == null ? e.getMessage() : driver.getErrorMessage(e);
        if (StringUtils.isEmpty(error))
            error = e.toString();
        if (StringUtils.isEmpty(error))
            error = e.getClass().getName();
    }

    @Override
    public String getError() {
        return error;
    }

    @Override
    public void setSameLevelLinks(Collection<URI> links) {
        this.sameLevelLinks = links;
    }

    @Override
    public Collection<URI> getSameLevelLinks() {
        return sameLevelLinks;
    }

    @Override
    public void setLinks(Collection<URI> links) {
        this.nextLevelLinks = links;
    }

    @Override
    public Collection<URI> getLinks() {
        return nextLevelLinks;
    }

    @Override
    public void setFilteredLinks(Collection<URI> filteredLinks) {
        this.filteredLinks = filteredLinks;
    }

    @Override
    public Collection<URI> getFilteredLinks() {
        return filteredLinks;
    }

    void setStartDomain(boolean isStartDomain) {
        this.isStartDomain = isStartDomain;
    }

    @Override
    public boolean isStartDomain() {
        return isStartDomain;
    }

    void setStartSubDomain(boolean isStartSubDomain) {
        this.isStartSubDomain = isStartSubDomain;
    }

    @Override
    public boolean isStartSubDomain() {
        return isStartSubDomain;
    }

    @Override
    public void hrefToURICollection(Collection<String> hrefCollection, Collection<URI> uriCollection) {
        if (hrefCollection == null)
            return;
        URI uri = baseURI != null ? baseURI : getURI();
        for (String href : hrefCollection) {
            href = StringUtils.replace(href, " ", "%20");
            URI resolvedURI = LinkUtils.resolveQuietly(uri, href);
            if (resolvedURI != null)
                uriCollection.add(resolvedURI);
        }
    }

}