Download leech Free Java Code
Description
Crawling capabilities for Apache Tika. Crawl content out of e.g. file systems, http(s) sources (webcrawling) imap(s) servers or your own arbitrary data sources. Leech offers additional Tika parsers providing these crawling capabilities.
Source Files
The download file leech-master.zip has the following entries.
.gitignore/*ww w. j a v a2s. co m*/
README.md
codeSnippets.md
extending.md
gpl_v3.txt
how2build.txt
how2start.md
mailinglist.md
people.md
pom.xml
src/main/assembly/distributable.xml
src/main/java/de/dfki/km/leech/Leech.java
src/main/java/de/dfki/km/leech/config/CrawlerContext.java
src/main/java/de/dfki/km/leech/config/DirectoryCrawlerContext.java
src/main/java/de/dfki/km/leech/config/HtmlCrawlerContext.java
src/main/java/de/dfki/km/leech/config/ImapCrawlerContext.java
src/main/java/de/dfki/km/leech/config/LeechDefaultConfig.java
src/main/java/de/dfki/km/leech/detect/DatasourceMediaTypes.java
src/main/java/de/dfki/km/leech/detect/DirectoryDatasourceDetector.java
src/main/java/de/dfki/km/leech/detect/ImapDatasourceDetector.java
src/main/java/de/dfki/km/leech/detect/LeechDefaultDetector.java
src/main/java/de/dfki/km/leech/io/FileURLStreamProvider.java
src/main/java/de/dfki/km/leech/io/HttpURLStreamProvider.java
src/main/java/de/dfki/km/leech/io/ImapURLStreamProvider.java
src/main/java/de/dfki/km/leech/io/ShiftInitInputStream.java
src/main/java/de/dfki/km/leech/io/URLStreamProvider.java
src/main/java/de/dfki/km/leech/lucene/Buzzwords.java
src/main/java/de/dfki/km/leech/lucene/DateParser.java
src/main/java/de/dfki/km/leech/lucene/DateUtils.java
src/main/java/de/dfki/km/leech/lucene/FieldConfig.java
src/main/java/de/dfki/km/leech/lucene/FieldFactory.java
src/main/java/de/dfki/km/leech/lucene/LeechSimpleAnalyzer.java
src/main/java/de/dfki/km/leech/lucene/LetterOrDigitLowerCaseTokenizer.java
src/main/java/de/dfki/km/leech/lucene/LuceneAnalyzerFactory.java
src/main/java/de/dfki/km/leech/lucene/PageCountEstimator.java
src/main/java/de/dfki/km/leech/lucene/Term2FrequencyEntry.java
src/main/java/de/dfki/km/leech/lucene/ToLuceneContentHandler.java
src/main/java/de/dfki/km/leech/lucene/TopFrequentTermsTermVectorMapper.java
src/main/java/de/dfki/km/leech/metadata/LeechMetadata.java
src/main/java/de/dfki/km/leech/parser/CrawlerParser.java
src/main/java/de/dfki/km/leech/parser/DirectoryCrawlerParser.java
src/main/java/de/dfki/km/leech/parser/HtmlCrawlerParser.java
src/main/java/de/dfki/km/leech/parser/ImapCrawlerParser.java
src/main/java/de/dfki/km/leech/parser/SambaCrawlerParser.java
src/main/java/de/dfki/km/leech/parser/filter/RegExpPattern.java
src/main/java/de/dfki/km/leech/parser/filter/SubstringPattern.java
src/main/java/de/dfki/km/leech/parser/filter/URLFilter.java
src/main/java/de/dfki/km/leech/parser/filter/URLFilterPattern.java
src/main/java/de/dfki/km/leech/parser/filter/URLFilteringParser.java
src/main/java/de/dfki/km/leech/parser/incremental/IncrementalCrawlingHistory.java
src/main/java/de/dfki/km/leech/parser/incremental/IncrementalCrawlingParser.java
src/main/java/de/dfki/km/leech/parser/rss/FeedParser2.java
src/main/java/de/dfki/km/leech/parser/wikipedia/WikipediaDumpParser.java
src/main/java/de/dfki/km/leech/sax/CrawlReportContentHandler.java
src/main/java/de/dfki/km/leech/sax/DataSinkContentHandler.java
src/main/java/de/dfki/km/leech/sax/DataSinkContentHandlerAdapter.java
src/main/java/de/dfki/km/leech/sax/PrintlnContentHandler.java
src/main/java/de/dfki/km/leech/util/ExceptionUtils.java
src/main/java/de/dfki/km/leech/util/FileUtils.java
src/main/java/de/dfki/km/leech/util/IndexPostprocessor.java
src/main/java/de/dfki/km/leech/util/InquisitionMapEntry.java
src/main/java/de/dfki/km/leech/util/LeechException.java
src/main/java/de/dfki/km/leech/util/Levenshtein.java
src/main/java/de/dfki/km/leech/util/LuceneIndexCreator.java
src/main/java/de/dfki/km/leech/util/MultiValueBalancedTreeMap.java
src/main/java/de/dfki/km/leech/util/MultiValueHashMap.java
src/main/java/de/dfki/km/leech/util/MultiValueMap.java
src/main/java/de/dfki/km/leech/util/MultiValueTreeMap.java
src/main/java/de/dfki/km/leech/util/OSUtils.java
src/main/java/de/dfki/km/leech/util/StopWatch.java
src/main/java/de/dfki/km/leech/util/StringUtils.java
src/main/java/de/dfki/km/leech/util/TikaUtils.java
src/main/java/de/dfki/km/leech/util/UrlUtil.java
src/main/java/de/dfki/km/leech/util/ValueHolder.java
src/main/java/de/dfki/km/leech/util/certificates/CertificateIgnoringSocketFactory.java
src/main/java/de/dfki/km/leech/util/certificates/CertificateStore.java
src/main/java/de/dfki/km/leech/util/certificates/Decision.java
src/main/java/de/dfki/km/leech/util/certificates/PersistentCertificateStore.java
src/main/java/de/dfki/km/leech/util/certificates/RootCertificateStore.java
src/main/java/de/dfki/km/leech/util/certificates/SessionCertificateStore.java
src/main/java/de/dfki/km/leech/util/certificates/StandardTrustManager.java
src/main/java/de/dfki/km/leech/util/certificates/TrustDecider.java
src/main/resources/META-INF/services/de.dfki.km.leech.io.URLStreamProvider
src/main/resources/META-INF/services/org.apache.tika.parser.Parser
src/main/resources/org/apache/tika/mime/custom-mimetypes.xml
src/test/.gitignore
supporters.md
Download
Click the following link to download leech-master.zip.
leech-master.zip