com.lyncode.xoai.serviceprovider.iterators.RecordIterator.java Source code

Java tutorial

Introduction

Here is the source code for com.lyncode.xoai.serviceprovider.iterators.RecordIterator.java

Source

/**
 * Copyright 2012 Lyncode
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * @author Development @ Lyncode <development@lyncode.com>
 * @version 2.2.9
 */

package com.lyncode.xoai.serviceprovider.iterators;

import com.lyncode.xoai.serviceprovider.HarvesterManager;
import com.lyncode.xoai.serviceprovider.configuration.Configuration;
import com.lyncode.xoai.serviceprovider.data.Record;
import com.lyncode.xoai.serviceprovider.exceptions.*;
import com.lyncode.xoai.serviceprovider.util.URLEncoder;
import com.lyncode.xoai.serviceprovider.util.XMLUtils;
import com.lyncode.xoai.serviceprovider.verbs.ListRecords.ExtraParameters;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.StatusLine;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.InputStream;
import java.util.LinkedList;
import java.util.Queue;

/**
 * @author Development @ Lyncode <development@lyncode.com>
 * @version 2.2.9
 */
public class RecordIterator {
    private static Logger log = LogManager.getLogger(RecordIterator.class);

    private Configuration config;
    private String baseUrl;
    private String metadataPrefix;
    private String proxyIp;
    private int proxyPort;
    private ExtraParameters extra;

    public RecordIterator(Configuration configuration, String baseUrl, String metadataPrefix, String proxyIp,
            int proxyPort, ExtraParameters extra) {
        super();
        this.config = configuration;
        this.baseUrl = baseUrl;
        this.metadataPrefix = metadataPrefix;
        this.proxyIp = proxyIp;
        this.proxyPort = proxyPort;
        this.extra = extra;
    }

    private String resumption = null;
    private Queue<Record> _queue = null;

    private String makeUrl() {
        if (resumption != null && !resumption.trim().equals("")) {
            try {
                int wait = this.config.getResumptionInterval();
                log.debug("Waiting " + wait + " miliseconds");
                Thread.sleep(wait);
            } catch (Exception e) {
                log.error(e.getMessage(), e);
            }
            return (baseUrl + "?verb=ListRecords" + URLEncoder.SEPARATOR + "resumptionToken="
                    + URLEncoder.encode(resumption));
        } else {
            if (extra == null || extra.equals(""))
                return (baseUrl + "?verb=ListRecords" + URLEncoder.SEPARATOR + "metadataPrefix=" + metadataPrefix);
            else
                return (baseUrl + "?verb=ListRecords" + URLEncoder.SEPARATOR + "metadataPrefix=" + metadataPrefix
                        + URLEncoder.SEPARATOR + extra.toUrl());
        }
    }

    private void harvest() throws NoRecordsMatchException, BadResumptionTokenException,
            CannotDisseminateFormatException, NoSetHierarchyException, InternalHarvestException {
        HttpClient httpclient = new DefaultHttpClient();
        String url = makeUrl();
        log.info("Harvesting: " + url);
        HttpGet httpget = new HttpGet(url);
        httpget.addHeader("User-Agent", HarvesterManager.USERAGENT);
        httpget.addHeader("From", HarvesterManager.FROM);

        HttpResponse response = null;

        if (this.proxyIp != null && this.proxyPort > -1) {
            HttpHost proxy = new HttpHost(this.proxyIp, this.proxyPort);
            httpclient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy);
        }

        try {
            response = httpclient.execute(httpget);
            StatusLine status = response.getStatusLine();

            log.debug(response.getStatusLine());

            if (status.getStatusCode() == 503) // 503 Status (must wait)
            {
                org.apache.http.Header[] headers = response.getAllHeaders();
                for (org.apache.http.Header h : headers) {
                    if (h.getName().equals("Retry-After")) {
                        String retry_time = h.getValue();
                        try {
                            Thread.sleep(Integer.parseInt(retry_time) * 1000);
                        } catch (NumberFormatException e) {
                            log.warn("Cannot parse " + retry_time + " to Integer", e);
                        } catch (InterruptedException e) {
                            log.debug(e.getMessage(), e);
                        }
                        httpclient.getConnectionManager().shutdown();
                        httpclient = new DefaultHttpClient();
                        response = httpclient.execute(httpget);
                    }
                }
            }

            HttpEntity entity = response.getEntity();
            InputStream instream = entity.getContent();

            Document doc = XMLUtils.parseDocument(instream);

            XMLUtils.checkListRecords(doc);

            NodeList listRecords = doc.getElementsByTagName("record");
            for (int i = 0; i < listRecords.getLength(); i++)
                _queue.add(XMLUtils.getRecord(listRecords.item(i)));

            resumption = XMLUtils.getText(doc.getElementsByTagName("resumptionToken"));
            log.debug("RESUMPTION: " + resumption);
        } catch (IOException e) {
            throw new InternalHarvestException(e);
        } catch (ParserConfigurationException e) {
            throw new InternalHarvestException(e);
        } catch (SAXException e) {
            throw new InternalHarvestException(e);
        }

    }

    public boolean hasNext() throws NoRecordsMatchException, BadResumptionTokenException,
            CannotDisseminateFormatException, NoSetHierarchyException, InternalHarvestException {
        if (_queue == null || (_queue.size() == 0 && resumption != null && !resumption.trim().equals(""))) {
            if (_queue == null)
                _queue = new LinkedList<Record>();
            this.harvest();
        }

        return (_queue.size() > 0);
    }

    public Record next() {
        return _queue.poll();
    }

}