com.frostwire.search.eztv.EztvSearchPerformer.java Source code

Introduction

Here is the source code for com.frostwire.search.eztv.EztvSearchPerformer.java
Source

/*
 * Created by Angel Leon (@gubatron), Alden Torres (aldenml)
 * Copyright (c) 2011-2016, FrostWire(R). All rights reserved.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package com.frostwire.search.eztv;

import com.frostwire.search.CrawlableSearchResult;
import com.frostwire.search.SearchMatcher;
import com.frostwire.search.torrent.TorrentRegexSearchPerformer;
import org.apache.commons.lang3.StringUtils;

import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

/**
 * @author gubatron
 * @author aldenml
 */
public class EztvSearchPerformer extends TorrentRegexSearchPerformer<EztvSearchResult> {

    private static final int MAX_RESULTS = 20;
    public static final String SEARCH_RESULTS_REGEX = "(?is)<a href=\"(/ep/.*?)\"";

    // This is a good example of optional regex groups when a page might have different possible formats to parse.
    public static final String TORRENT_DETAILS_PAGE_REGEX = "(?is)<td class=\"section_post_header\" colspan=\"2\"><h1><span style.*?>(?<displaynamefallback>.*?)</span></h1></td>.*?"
            + "Download Links.*?" + ".*<a href=\"(?<magneturl>magnet:\\?.*?)\" class=\"magnet\".*?" +
            //"(<a href=\"(?<magneturl>magnet:\\?.*?)\" title=\"Magnet Link\".*?)?"+
            ".*<a href=\"(?<torrenturl>http(s)?.*?\\.torrent)\" class=\"download_.\".*?"
            + "Seeds: <span.*?>(?<seeds>.*?)</span><br.*?" + "(Torrent Info.*?title=\"(?<displayname>.*?)\".*?)?"
            + "(<b>Torrent File:</b>\\s+(?<displayname2>.*?)<br.*?)?"
            + "(<b>Torrent Hash:</b>\\s+(?<infohash>.*?)<br.*?)?" + "<b>Filesize:</b>\\s+(?<filesize>.*?)<br.*?"
            + "<b>Released:</b>\\s+(?<creationtime>.*?)<br";

    public EztvSearchPerformer(String domainName, long token, String keywords, int timeout) {
        super(domainName, token, keywords, timeout, 1, 2 * MAX_RESULTS, MAX_RESULTS, SEARCH_RESULTS_REGEX,
                TORRENT_DETAILS_PAGE_REGEX);
    }

    @Override
    public CrawlableSearchResult fromMatcher(SearchMatcher matcher) {
        String itemId = matcher.group(1);
        return new EztvTempSearchResult(getDomainName(), itemId);
    }

    @Override
    protected String fetchSearchPage(String url) {
        Map<String, String> formData = new HashMap<>();
        formData.put("SearchString1", getEncodedKeywords());
        formData.put("SearchString", "");
        formData.put("search", "Search");
        String page = post(url, formData);

        return page != null && isValidHtml(page) ? page : null;
    }

    @Override
    protected String getUrl(int page, String encodedKeywords) {
        return "https://" + getDomainName() + "/search/" + encodedKeywords;
    }

    @Override
    protected EztvSearchResult fromHtmlMatcher(CrawlableSearchResult sr, SearchMatcher matcher) {
        return new EztvSearchResult(sr.getDetailsUrl(), matcher);
    }

    @Override
    protected int htmlPrefixOffset(String html) {
        int offset = html.indexOf("id=\"searchsearch_submit\"");
        return offset > 0 ? offset : 0;
    }

    // EZTV is very simplistic in the search engine
    // just a simple keyword check allows to discard the page
    protected boolean isValidHtml(String html) {
        if (html == null || html.contains("Cloudflare")) {
            return false;
        }
        String[] keywords = getKeywords().split(" ");
        String k = null;
        // select the first keyword with length >= 3
        for (int i = 0; k == null && i < keywords.length; i++) {
            String s = keywords[i];
            if (s.length() >= 3) {
                k = s;
            }
        }
        if (k == null) {
            k = keywords[0];
        }

        int count = StringUtils.countMatches(html.toLowerCase(Locale.US), k.toLowerCase(Locale.US));

        return count > 9;
    }
}