You can download WebCollector-2.09.jar in this page.
GPL2.0
WebCollector-2.09.jar file has the following types.
META-INF/MANIFEST.MF META-INF/maven/cn.edu.hfut.dmic.webcollector/WebCollector/pom.properties META-INF/maven/cn.edu.hfut.dmic.webcollector/WebCollector/pom.xml cn.edu.hfut.dmic.webcollector.crawler.BreadthCrawler.class cn.edu.hfut.dmic.webcollector.crawler.Crawler.class cn.edu.hfut.dmic.webcollector.crawler.DeepCrawler.class cn.edu.hfut.dmic.webcollector.crawler.MultiExtractorCrawler.class cn.edu.hfut.dmic.webcollector.extract.Extractor.class cn.edu.hfut.dmic.webcollector.extract.ExtractorFactory.class cn.edu.hfut.dmic.webcollector.extract.ExtractorParams.class cn.edu.hfut.dmic.webcollector.extract.ExtractorTest.class cn.edu.hfut.dmic.webcollector.extract.Extractors.class cn.edu.hfut.dmic.webcollector.extract.RegexExtractorFactory.class cn.edu.hfut.dmic.webcollector.fetcher.DbUpdater.class cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.class cn.edu.hfut.dmic.webcollector.fetcher.MapVisitorFactory.class cn.edu.hfut.dmic.webcollector.fetcher.SegmentWriter.class cn.edu.hfut.dmic.webcollector.fetcher.Visitor.class cn.edu.hfut.dmic.webcollector.fetcher.VisitorFactory.class cn.edu.hfut.dmic.webcollector.generator.Generator.class cn.edu.hfut.dmic.webcollector.generator.Injector.class cn.edu.hfut.dmic.webcollector.generator.StandardGenerator.class cn.edu.hfut.dmic.webcollector.model.CrawlDatum.class cn.edu.hfut.dmic.webcollector.model.Links.class cn.edu.hfut.dmic.webcollector.model.Page.class cn.edu.hfut.dmic.webcollector.net.HttpRequest.class cn.edu.hfut.dmic.webcollector.net.HttpRequester.class cn.edu.hfut.dmic.webcollector.net.HttpRequesterImpl.class cn.edu.hfut.dmic.webcollector.net.HttpResponse.class cn.edu.hfut.dmic.webcollector.net.ProxyGenerator.class cn.edu.hfut.dmic.webcollector.net.Proxys.class cn.edu.hfut.dmic.webcollector.net.RandomProxyGenerator.class cn.edu.hfut.dmic.webcollector.net.RequestConfig.class cn.edu.hfut.dmic.webcollector.net.SingleProxyGenerator.class cn.edu.hfut.dmic.webcollector.util.BerkeleyDBUtils.class cn.edu.hfut.dmic.webcollector.util.CharsetDetector.class cn.edu.hfut.dmic.webcollector.util.Config.class cn.edu.hfut.dmic.webcollector.util.FileSystemOutput.class cn.edu.hfut.dmic.webcollector.util.FileUtils.class cn.edu.hfut.dmic.webcollector.util.JsoupUtils.class cn.edu.hfut.dmic.webcollector.util.RegexRule.class log4j.properties
WebCollector-2.09.pom file content.
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <name>WebCollector</name> <groupId>cn.edu.hfut.dmic.webcollector</groupId> <artifactId>WebCollector</artifactId> <version>2.09</version> <description>A java crawler for information collection</description> <url>https://github.com/CrawlScript/WebCollector</url> <packaging>jar</packaging> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <maven.compiler.source>1.6</maven.compiler.source> <maven.compiler.target>1.6</maven.compiler.target> </properties> <licenses> <license> <name>GPL2.0</name> <url>http://www.gnu.org/licenses/gpl-2.0.html</url> <distribution>repo</distribution> </license> </licenses> <scm> <url>https://github.com/CrawlScript/WebCollector</url> <connection>scm:git:https://github.com/CrawlScript/WebCollector.git</connection> </scm> <distributionManagement> <snapshotRepository> <id>ossrh</id> <url>https://oss.sonatype.org/content/repositories/snapshots</url> </snapshotRepository> </distributionManagement> <build> <plugins> <plugin> <groupId>org.sonatype.plugins</groupId> <artifactId>nexus-staging-maven-plugin</artifactId> <version>1.6.3</version> <extensions>true</extensions> <configuration> <serverId>ossrh</serverId> <nexusUrl>https://oss.sonatype.org/</nexusUrl> <autoReleaseAfterClose>true</autoReleaseAfterClose> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-surefire-plugin</artifactId> <version>2.7.2</version> <configuration> <forkMode>once</forkMode> <argLine>-Dfile.encoding=UTF-8</argLine> <systemProperties> <property> <name>net.sourceforge.cobertura.datafile</name> <value>target/cobertura/cobertura.ser</value> </property> </systemProperties> <skipTests>true</skipTests> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-dependency-plugin</artifactId> <version>2.8</version> <executions> <execution> <id>copy-dependencies</id> <phase>package</phase> <goals> <goal>copy-dependencies</goal> </goals> <configuration> <outputDirectory>${project.build.directory}/</outputDirectory> <overWriteReleases>false</overWriteReleases> <overWriteSnapshots>true</overWriteSnapshots> <excludeTransitive>false</excludeTransitive> </configuration> </execution> </executions> </plugin> <!-- <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-jar-plugin</artifactId> <version>2.5</version> <configuration> <archive> <manifest> <addClasspath>true</addClasspath> <mainClass>cn.edu.hfut.dmic.webcollector.ui.BreadthCrawlerUI</mainClass> </manifest> </archive> </configuration> </plugin> --> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-javadoc-plugin</artifactId> <version>2.6.1</version> <configuration> </configuration> <executions> <execution> <id>attach-javadoc</id> <phase>verify</phase> <goals> <goal>jar</goal> </goals> </execution> </executions> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-source-plugin</artifactId> <version>2.1.1</version> <configuration> </configuration> <executions> <execution> <id>attach-sources</id> <phase>verify</phase> <goals> <goal>jar</goal> </goals> </execution> </executions> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-gpg-plugin</artifactId> <version>1.5</version> <executions> <execution> <id>sign-artifacts</id> <phase>verify</phase> <goals> <goal>sign</goal> </goals> </execution> </executions> </plugin> <!-- <plugin> <artifactId>maven-antrun-plugin</artifactId> <executions> <execution> <phase>package</phase> <configuration> <tasks> <copy todir="${project.build.directory}/bin"> <fileset dir="${project.build.directory}"> <include name="*.jar" /> <include name="*.sh" /> <include name="*.bat" /> </fileset> </copy> <zip basedir="${project.build.directory}/bin" destfile="../webcollector-${project.version}-bin.zip" /> </tasks> </configuration> <goals> <goal>run</goal> </goals> </execution> </executions> </plugin> --> </plugins> </build> <dependencies> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.7.3</version> </dependency> <dependency> <groupId>com.googlecode.juniversalchardet</groupId> <artifactId>juniversalchardet</artifactId> <version>1.0.3</version> </dependency> <dependency> <groupId>log4j</groupId> <artifactId>log4j</artifactId> <version>1.2.17</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.11</version> </dependency> <dependency> <groupId>org.json</groupId> <artifactId>json</artifactId> <version>20140107</version> </dependency> <dependency> <groupId>com.sleepycat</groupId> <artifactId>je</artifactId> <version>5.0.73</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> <version>1.7.9</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>1.7.9</version> </dependency> </dependencies> <developers> <developer> <id>hujun</id> <name>Hu Jun</name> <email>hujunxianligong@gmail.com</email> <roles> <role>Lead Developer</role> </roles> <timezone>+8</timezone> </developer> </developers> </project>
<dependency> <groupId>cn.edu.hfut.dmic.webcollector</groupId> <artifactId>WebCollector</artifactId> <version>2.09</version> </dependency>
If you think the following WebCollector-2.09.jar downloaded from Maven central repository is inappropriate, such as containing malicious code/tools or violating the copyright, please email , thanks.
Download WebCollector-2.09.jar file