Java tutorial
/* Copyright 2010-2014 Norconex Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.norconex.collector.http.data; import java.lang.reflect.InvocationTargetException; import org.apache.commons.beanutils.BeanUtils; import org.apache.commons.lang3.builder.EqualsBuilder; import org.apache.commons.lang3.builder.HashCodeBuilder; import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.commons.lang3.builder.ToStringStyle; import com.norconex.collector.core.CollectorException; import com.norconex.collector.core.data.BaseCrawlData; import com.norconex.collector.core.data.ICrawlData; /** * A URL being crawled holding relevant crawl information. * @author Pascal Essiembre */ public class HttpCrawlData extends BaseCrawlData { private static final long serialVersionUID = -2219206220476107409L; private int depth; private String urlRoot; private Long sitemapLastMod; private String sitemapChangeFreq; private Float sitemapPriority; private String originalReference; private String referrerLinkText; private String referrerReference; private String referrerLinkTag; private String referrerLinkTitle; public HttpCrawlData() { super(); } public HttpCrawlData(ICrawlData crawlData) { if (crawlData != null) { try { BeanUtils.copyProperties(this, crawlData); } catch (IllegalAccessException | InvocationTargetException e) { throw new CollectorException(e); } } } /** * Constructor. * @param url URL being crawled * @param depth URL depth */ public HttpCrawlData(String url, int depth) { super(); setReference(url); setDepth(depth); } public String getOriginalReference() { return originalReference; } public void setOriginalReference(String originalReference) { this.originalReference = originalReference; } /** * Gets the URL depth. * @return URL depth */ public int getDepth() { return depth; } /** * Gets the sitemap last modified date in milliseconds (EPOCH date). * @return date as long */ public Long getSitemapLastMod() { return sitemapLastMod; } /** * Sets the sitemap last modified date in milliseconds (EPOCH date). * @param sitemapLastMod date as long */ public void setSitemapLastMod(Long sitemapLastMod) { this.sitemapLastMod = sitemapLastMod; } /** * Gets the sitemap change frequency. * @return sitemap change frequency */ public String getSitemapChangeFreq() { return sitemapChangeFreq; } /** * Sets the sitemap change frequency. * @param sitemapChangeFreq sitemap change frequency */ public void setSitemapChangeFreq(String sitemapChangeFreq) { this.sitemapChangeFreq = sitemapChangeFreq; } /** * Gets the sitemap priority. * @return sitemap priority */ public Float getSitemapPriority() { return sitemapPriority; } /** * Sets the sitemap priority. * @param sitemapPriority sitemap priority */ public void setSitemapPriority(Float sitemapPriority) { this.sitemapPriority = sitemapPriority; } /** * Sets the URL depth. * @param depth URL depth */ public final void setDepth(int depth) { this.depth = depth; } public String getReferrerLinkText() { return referrerLinkText; } public void setReferrerLinkText(String referrerLinkText) { this.referrerLinkText = referrerLinkText; } public String getReferrerReference() { return referrerReference; } public void setReferrerReference(String referrerReference) { this.referrerReference = referrerReference; } public String getReferrerLinkTag() { return referrerLinkTag; } public void setReferrerLinkTag(String referrerLinkTag) { this.referrerLinkTag = referrerLinkTag; } public String getReferrerLinkTitle() { return referrerLinkTitle; } public void setReferrerLinkTitle(String referrerLinkTitle) { this.referrerLinkTitle = referrerLinkTitle; } @Override public final void setReference(String url) { super.setReference(url); if (url != null) { this.urlRoot = url.replaceFirst("(.*?://.*?)(/.*)", "$1"); } else { this.urlRoot = null; } } /** * Gets the URL root (protocol + domain, e.g. http://www.host.com). * @return URL root */ public String getUrlRoot() { return urlRoot; } @Override public boolean equals(final Object other) { if (!(other instanceof HttpCrawlData)) { return false; } HttpCrawlData castOther = (HttpCrawlData) other; return new EqualsBuilder().appendSuper(super.equals(other)).append(depth, castOther.depth) .append(urlRoot, castOther.urlRoot).append(sitemapLastMod, castOther.sitemapLastMod) .append(sitemapChangeFreq, castOther.sitemapChangeFreq) .append(sitemapPriority, castOther.sitemapPriority) .append(originalReference, castOther.originalReference) .append(referrerLinkText, castOther.referrerLinkText) .append(referrerReference, castOther.referrerReference) .append(referrerLinkTag, castOther.referrerLinkTag) .append(referrerLinkTitle, castOther.referrerLinkTitle).isEquals(); } @Override public int hashCode() { return new HashCodeBuilder().appendSuper(super.hashCode()).append(depth).append(urlRoot) .append(sitemapLastMod).append(sitemapChangeFreq).append(sitemapPriority).append(originalReference) .append(referrerLinkText).append(referrerReference).append(referrerLinkTag) .append(referrerLinkTitle).toHashCode(); } @Override public String toString() { return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).appendSuper(super.toString()) .append("depth", depth).append("urlRoot", urlRoot).append("sitemapLastMod", sitemapLastMod) .append("sitemapChangeFreq", sitemapChangeFreq).append("sitemapPriority", sitemapPriority) .append("originalReference", originalReference).append("referrerLinkText", referrerLinkText) .append("referrerReference", referrerReference).append("referrerLinkTag", referrerLinkTag) .append("referrerLinkTitle", referrerLinkTitle).toString(); } }