You can download crawler4j-4.2.jar in this page.
The Apache Software License, Version 2.0
crawler4j-4.2.jar file has the following types.
META-INF/MANIFEST.MF META-INF/maven/edu.uci.ics/crawler4j/pom.properties META-INF/maven/edu.uci.ics/crawler4j/pom.xml edu.uci.ics.crawler4j.crawler.Configurable.class edu.uci.ics.crawler4j.crawler.CrawlConfig.class edu.uci.ics.crawler4j.crawler.CrawlController.class edu.uci.ics.crawler4j.crawler.Page.class edu.uci.ics.crawler4j.crawler.WebCrawler.class edu.uci.ics.crawler4j.crawler.authentication.AuthInfo.class edu.uci.ics.crawler4j.crawler.authentication.BasicAuthInfo.class edu.uci.ics.crawler4j.crawler.authentication.FormAuthInfo.class edu.uci.ics.crawler4j.crawler.authentication.NtAuthInfo.class edu.uci.ics.crawler4j.crawler.exceptions.ContentFetchException.class edu.uci.ics.crawler4j.crawler.exceptions.PageBiggerThanMaxSizeException.class edu.uci.ics.crawler4j.crawler.exceptions.ParseException.class edu.uci.ics.crawler4j.crawler.exceptions.RedirectException.class edu.uci.ics.crawler4j.fetcher.IdleConnectionMonitorThread.class edu.uci.ics.crawler4j.fetcher.PageFetchResult.class edu.uci.ics.crawler4j.fetcher.PageFetcher.class edu.uci.ics.crawler4j.frontier.Counters.class edu.uci.ics.crawler4j.frontier.DocIDServer.class edu.uci.ics.crawler4j.frontier.Frontier.class edu.uci.ics.crawler4j.frontier.InProcessPagesDB.class edu.uci.ics.crawler4j.frontier.WebURLTupleBinding.class edu.uci.ics.crawler4j.frontier.WorkQueues.class edu.uci.ics.crawler4j.parser.BinaryParseData.class edu.uci.ics.crawler4j.parser.ExtractedUrlAnchorPair.class edu.uci.ics.crawler4j.parser.HtmlContentHandler.class edu.uci.ics.crawler4j.parser.HtmlParseData.class edu.uci.ics.crawler4j.parser.NotAllowedContentException.class edu.uci.ics.crawler4j.parser.ParseData.class edu.uci.ics.crawler4j.parser.Parser.class edu.uci.ics.crawler4j.parser.TextParseData.class edu.uci.ics.crawler4j.robotstxt.HostDirectives.class edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig.class edu.uci.ics.crawler4j.robotstxt.RobotstxtParser.class edu.uci.ics.crawler4j.robotstxt.RobotstxtServer.class edu.uci.ics.crawler4j.robotstxt.RuleSet.class edu.uci.ics.crawler4j.url.TLDList.class edu.uci.ics.crawler4j.url.URLCanonicalizer.class edu.uci.ics.crawler4j.url.UrlResolver.class edu.uci.ics.crawler4j.url.WebURL.class edu.uci.ics.crawler4j.util.IO.class edu.uci.ics.crawler4j.util.Net.class edu.uci.ics.crawler4j.util.Util.class tld-names.txt
crawler4j-4.2.pom file content.
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>edu.uci.ics</groupId> <artifactId>crawler4j</artifactId> <packaging>jar</packaging> <name>crawler4j</name> <version>4.2</version> <description>Open Source Web Crawler for Java</description> <url>https://github.com/yasserg/crawler4j</url> <licenses> <license> <name>The Apache Software License, Version 2.0</name> <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url> <distribution>repo</distribution> </license> </licenses> <scm> <url>https://github.com/yasserg/crawler4j</url> <connection>scm:git:git@github.com:yasserg/crawler4j.git</connection> <developerConnection>scm:git:git@github.com:yasserg/crawler4j.git</developerConnection> </scm> <parent> <groupId>org.sonatype.oss</groupId> <artifactId>oss-parent</artifactId> <version>7</version> </parent> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>3.2</version> <configuration> <source>1.7</source> <target>1.7</target> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-jar-plugin</artifactId> <version>2.5</version> <configuration> <excludes> <exclude>**/*.properties</exclude> </excludes> </configuration> </plugin> <!-- generate a source jar --> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-source-plugin</artifactId> <version>2.4</version> <executions> <execution> <id>attach-sources</id> <goals> <goal>jar</goal> </goals> </execution> </executions> </plugin> <!-- generate a javadoc jar --> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-javadoc-plugin</artifactId> <version>2.10.1</version> <executions> <execution> <id>attach-javadocs</id> <goals> <goal>jar</goal> </goals> </execution> </executions> </plugin> <plugin> <artifactId>maven-assembly-plugin</artifactId> <version>2.5.3</version> <configuration> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> </configuration> <executions> <execution> <id>make-fat-jar</id> <phase>package</phase> <goals> <goal>single</goal> </goals> <configuration> <finalName>crawler4j-${project.version}</finalName> </configuration> </execution> </executions> </plugin> </plugins> </build> <dependencies> <!-- Compile time Dependencies --> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> <version>1.7.10</version> <scope>compile</scope> </dependency> <dependency> <groupId>uk.org.lidalia</groupId> <artifactId>lidalia-slf4j-ext</artifactId> <version>1.0.0</version> </dependency> <dependency> <groupId>ch.qos.logback</groupId> <artifactId>logback-classic</artifactId> <version>1.1.2</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.4</version> <scope>compile</scope> </dependency> <dependency> <groupId>com.sleepycat</groupId> <artifactId>je</artifactId> <version>5.0.73</version> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-parsers</artifactId> <version>1.5</version> </dependency> <!-- Test Dependencies --> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.11</version> <scope>test</scope> </dependency> </dependencies> <repositories> <repository> <id>oracleReleases</id> <name>Oracle Released Java Packages</name> <url>http://download.oracle.com/maven</url> <layout>default</layout> </repository> </repositories> </project>
<dependency> <groupId>edu.uci.ics</groupId> <artifactId>crawler4j</artifactId> <version>4.2</version> </dependency>
If you think the following crawler4j-4.2.jar downloaded from Maven central repository is inappropriate, such as containing malicious code/tools or violating the copyright, please email , thanks.
Download crawler4j-4.2.jar file