net.yacy.cora.federate.opensearch.SRURSSConnector.java Source code

Java tutorial

Introduction

Here is the source code for net.yacy.cora.federate.opensearch.SRURSSConnector.java

Source

/**
 *  AccumulateSRURSS
 *  Copyright 2010 by Michael Peter Christen
 *  First released 06.01.2011 at http://yacy.net
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program in the file lgpl21.txt
 *  If not, see <http://www.gnu.org/licenses/>.
 */

package net.yacy.cora.federate.opensearch;

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.LinkedHashMap;
import java.util.concurrent.BlockingQueue;

import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.feed.RSSFeed;
import net.yacy.cora.document.feed.RSSMessage;
import net.yacy.cora.document.feed.RSSReader;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.HTTPClient;

import org.apache.http.entity.mime.content.ContentBody;

public class SRURSSConnector {

    private final static int recordsPerSession = 250;

    public static Thread searchSRURSS(final BlockingQueue<RSSMessage> queue, final String urlBase,
            final String query, final long timeoutInit, final int maximumRecordsInit, final CacheStrategy verify,
            final boolean global, final ClientIdentification.Agent agent) {
        final Thread job = new Thread("searchSRURSS:" + urlBase) {
            @Override
            public void run() {
                int startRecord = 0;
                RSSMessage message;
                int maximumRecords = maximumRecordsInit;
                long timeout = timeoutInit;
                mainloop: while (timeout > 0 && maximumRecords > 0) {
                    final long st = System.currentTimeMillis();
                    RSSFeed feed;
                    try {
                        feed = loadSRURSS(urlBase, query, startRecord, recordsPerSession, verify, global, agent);
                    } catch (final IOException e1) {
                        break mainloop;
                    }
                    if (feed == null || feed.isEmpty())
                        break mainloop;
                    maximumRecords -= feed.size();
                    innerloop: while (!feed.isEmpty()) {
                        message = feed.pollMessage();
                        if (message == null)
                            break innerloop;
                        try {
                            queue.put(message);
                        } catch (final InterruptedException e) {
                            break innerloop;
                        }
                    }
                    startRecord += recordsPerSession;
                    timeout -= System.currentTimeMillis() - st;
                }
                try {
                    queue.put(RSSMessage.POISON);
                } catch (final InterruptedException e) {
                    e.printStackTrace();
                }
            }
        };
        job.start();
        return job;
    }

    /**
     * send a query to a yacy public search interface
     * @param rssSearchServiceURL the target url base (everything before the ? that follows the SRU request syntax properties). can null, then the local peer is used
     * @param query the query as string
     * @param startRecord number of first record
     * @param maximumRecords maximum number of records
     * @param verify if true, result entries are verified using the snippet fetch (slow); if false simply the result is returned
     * @param global if true also search results from other peers are included
     * @return
     */
    public static RSSFeed loadSRURSS(final String rssSearchServiceURL, final String query, final int startRecord,
            final int maximumRecords, final CacheStrategy cacheStrategy, final boolean global,
            final ClientIdentification.Agent agent) throws IOException {
        MultiProtocolURL uri = null;
        try {
            uri = new MultiProtocolURL(rssSearchServiceURL);
        } catch (final MalformedURLException e) {
            throw new IOException(
                    "cora.Search failed asking peer '" + rssSearchServiceURL + "': bad url, " + e.getMessage());
        }

        // send request
        byte[] result = new byte[0];
        try {
            final LinkedHashMap<String, ContentBody> parts = new LinkedHashMap<String, ContentBody>();
            parts.put("query", UTF8.StringBody(query));
            parts.put("startRecord", UTF8.StringBody(Integer.toString(startRecord)));
            parts.put("maximumRecords", UTF8.StringBody(Long.toString(maximumRecords)));
            parts.put("verify",
                    cacheStrategy == null ? UTF8.StringBody("false") : UTF8.StringBody(cacheStrategy.toName()));
            parts.put("resource", UTF8.StringBody(global ? "global" : "local"));
            parts.put("nav", UTF8.StringBody("none"));
            // result = HTTPConnector.getConnector(userAgent == null ? MultiProtocolURI.yacybotUserAgent : userAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
            final HTTPClient httpClient = new HTTPClient(agent);
            result = httpClient.POSTbytes(new MultiProtocolURL(rssSearchServiceURL), uri.getHost(), parts, false,
                    false);

            final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
            if (reader == null) {
                throw new IOException("cora.Search failed asking peer '" + uri.getHost()
                        + "': probably bad response from remote peer (1), reader == null");
            }
            final RSSFeed feed = reader.getFeed();
            if (feed == null) {
                // case where the rss reader does not understand the content
                throw new IOException("cora.Search failed asking peer '" + uri.getHost()
                        + "': probably bad response from remote peer (2)");
            }
            return feed;
        } catch (final IOException e) {
            throw new IOException("cora.Search error asking peer '" + uri.getHost() + "':" + e.toString());
        }
    }

}