com.norconex.collector.http.crawler.CrawlURL.java Source code

Java tutorial

Introduction

Here is the source code for com.norconex.collector.http.crawler.CrawlURL.java

Source

/* Copyright 2010-2013 Norconex Inc.
 * 
 * This file is part of Norconex HTTP Collector.
 * 
 * Norconex HTTP Collector is free software: you can redistribute it and/or 
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Norconex HTTP Collector is distributed in the hope that it will be useful, 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with Norconex HTTP Collector. If not, 
 * see <http://www.gnu.org/licenses/>.
 */
package com.norconex.collector.http.crawler;

import java.io.Serializable;

import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;

public class CrawlURL implements Serializable {

    private static final long serialVersionUID = -2219206220476107409L;

    private int depth;
    private String url;
    private String urlRoot;
    private Long sitemapLastMod;
    private String sitemapChangeFreq;
    private Float sitemapPriority;

    private CrawlStatus status;
    private String headChecksum;
    private String docChecksum;

    public CrawlURL(String url, int depth) {
        super();
        setUrl(url);
        setDepth(depth);
    }

    public int getDepth() {
        return depth;
    }

    public String getUrl() {
        return url;
    }

    /**
     * Gets the sitemap last modified date in milliseconds (EPOCH date).
     * @return date as long
     */
    public Long getSitemapLastMod() {
        return sitemapLastMod;
    }

    /**
     * Sets the sitemap last modified date in milliseconds (EPOCH date).
     * @param sitemapLastMod date as long
     */
    public void setSitemapLastMod(Long sitemapLastMod) {
        this.sitemapLastMod = sitemapLastMod;
    }

    public String getSitemapChangeFreq() {
        return sitemapChangeFreq;
    }

    public void setSitemapChangeFreq(String sitemapChangeFreq) {
        this.sitemapChangeFreq = sitemapChangeFreq;
    }

    public Float getSitemapPriority() {
        return sitemapPriority;
    }

    public void setSitemapPriority(Float sitemapPriority) {
        this.sitemapPriority = sitemapPriority;
    }

    public final void setDepth(int depth) {
        this.depth = depth;
    }

    public final void setUrl(String url) {
        this.url = url;
        if (url != null) {
            this.urlRoot = url.replaceFirst("(.*?://.*?)(/.*)", "$1");
        } else {
            this.urlRoot = null;
        }
    }

    public String getUrlRoot() {
        return urlRoot;
    }

    public CrawlStatus getStatus() {
        return status;
    }

    public void setStatus(CrawlStatus status) {
        this.status = status;
    }

    public String getHeadChecksum() {
        return headChecksum;
    }

    public void setHeadChecksum(String headChecksum) {
        this.headChecksum = headChecksum;
    }

    public String getDocChecksum() {
        return docChecksum;
    }

    public void setDocChecksum(String docChecksum) {
        this.docChecksum = docChecksum;
    }

    public CrawlURL safeClone() {
        CrawlURL c = new CrawlURL(url, depth);
        c.setDocChecksum(docChecksum);
        c.setHeadChecksum(headChecksum);
        c.setSitemapChangeFreq(sitemapChangeFreq);
        c.setSitemapLastMod(sitemapLastMod);
        c.setSitemapPriority(sitemapPriority);
        c.setStatus(status);
        return c;
    }

    @Override
    public boolean equals(final Object other) {
        if (!(other instanceof CrawlURL)) {
            return false;
        }
        CrawlURL castOther = (CrawlURL) other;
        return new EqualsBuilder().append(depth, castOther.depth).append(url, castOther.url)
                .append(urlRoot, castOther.urlRoot).append(sitemapLastMod, castOther.sitemapLastMod)
                .append(sitemapChangeFreq, castOther.sitemapChangeFreq)
                .append(sitemapPriority, castOther.sitemapPriority).append(status, castOther.status)
                .append(headChecksum, castOther.headChecksum).append(docChecksum, castOther.docChecksum).isEquals();
    }

    @Override
    public int hashCode() {
        return new HashCodeBuilder().append(depth).append(url).append(urlRoot).append(sitemapLastMod)
                .append(sitemapChangeFreq).append(sitemapPriority).append(status).append(headChecksum)
                .append(docChecksum).toHashCode();
    }

    @Override
    public String toString() {
        return new ToStringBuilder(this, ToStringStyle.SIMPLE_STYLE).append("depth", depth).append("url", url)
                .append("urlRoot", urlRoot).append("sitemapLastMod", sitemapLastMod)
                .append("sitemapChangeFreq", sitemapChangeFreq).append("sitemapPriority", sitemapPriority)
                .append("status", status).append("headChecksum", headChecksum).append("docChecksum", docChecksum)
                .toString();
    }

}