org.ariadne.oai.utils.HarvesterUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.ariadne.oai.utils.HarvesterUtils.java

Source

/*******************************************************************************
 * Copyright (c) 2008 Ariadne Foundation.
 * 
 * This file is part of Ariadne Harvester.
 * 
 * Ariadne Harvester is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Ariadne Harvester is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with Ariadne Harvester.  If not, see <http://www.gnu.org/licenses/>.
 ******************************************************************************/

package org.ariadne.oai.utils;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Calendar;
import java.util.Collection;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.TimeZone;
import java.util.TreeSet;
import java.util.Vector;

import org.apache.commons.httpclient.HttpConnection;
import org.apache.log4j.Logger;
import org.ariadne.config.PropertiesManager;
import org.ariadne.oai.OAIHarvester;
import org.ariadne.oai.Record;
import org.ariadne.util.JDomUtils;
import org.ariadne.util.OaiUtils;
import org.ietf.mimedir.MimeDir;
import org.ietf.mimedir.impl.MimeDirImpl;
import org.ietf.mimedir.util.MimeDirUtil;
import org.ietf.mimedir.vcard.impl.VCardImpl;
import org.jdom.CDATA;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.xpath.XPath;
import org.w3c.util.DateParser;

import uiuc.oai.OAIException;
import uiuc.oai.OAIRepository;

public class HarvesterUtils {

    // public static String getMetadataPrefix(OAIRepository oaiRepository)
    // throws OAIException{
    // OAIMetadataFormatList formats;
    // Vector allprefixes = new Vector();

    // formats = oaiRepository.listMetadataFormats();
    // while(formats.moreItems()){
    // allprefixes.add(formats.getCurrentItem().getMetadataPrefix());
    // formats.moveNext();
    // }
    // String priority = PropertiesManager.getInstance().getProperty("Harvest.Priority");
    // StringTokenizer st = new StringTokenizer(priority, ";");
    // while (st.hasMoreTokens()) {
    // String prefix = st.nextToken();
    // if(allprefixes.contains(prefix)){
    // return prefix;
    // }
    // }
    // return "";
    // }

    public static XPath mmIdOaiCatalog;
    public static XPath gIdOaiCatalog;

    static {
        try {
            mmIdOaiCatalog = XPath
                    .newInstance("//lom:lom/lom:metaMetadata/lom:identifier/lom:catalog/text()=\"oai\"");
            mmIdOaiCatalog.addNamespace(OaiUtils.LOMLOMNS);

            gIdOaiCatalog = XPath.newInstance("//lom:lom/lom:general/lom:identifier/lom:catalog/text()=\"oai\"");
            gIdOaiCatalog.addNamespace(OaiUtils.LOMLOMNS);
        } catch (JDOMException e) {
            //NOOP
        }
    }

    public static void addGlobalMetadataIdentifier(Record record, String reposIdentifier)
            throws IllegalStateException, JDOMException {

        String ident = "oai:" + reposIdentifier + ":";

        String loIdent = "";

        //      try {
        Element metametadata = JDomUtils.getXpathNode("//lom:lom/lom:metaMetadata", OaiUtils.LOMLOMNS,
                record.getMetadata());
        if (metametadata != null) {
            Element mmIdentifier = metametadata.getChild("identifier", OaiUtils.LOMNS);
            if (mmIdentifier != null) {
                if (!(Boolean) mmIdOaiCatalog.selectSingleNode(record.getMetadata())) {
                    loIdent = mmIdentifier.getChildText("entry", mmIdentifier.getNamespace());

                    ident = ident.concat(loIdent);

                    Element newIdentifier = new Element("identifier", OaiUtils.LOMNS);
                    metametadata.addContent(0, newIdentifier);

                    Element catalog = new Element("catalog", OaiUtils.LOMNS);
                    catalog.setText("oai");
                    newIdentifier.addContent(catalog);

                    Element entry = new Element("entry", OaiUtils.LOMNS);
                    entry.setText(ident);
                    newIdentifier.addContent(entry);
                }
            } else {
                //            throw new IllegalStateException("The LO has no metaMetadata.identifier set");
            }
        } else {
            //         throw new IllegalStateException("The LO has no metaMetadata.identifier set");
        }

    }

    public static void addGlobalLOIdentifier(Record record, String reposIdentifier)
            throws IllegalStateException, JDOMException {

        String ident = "oai:" + reposIdentifier + ":";

        String loIdent = "";

        //      try {
        Element general = JDomUtils.getXpathNode("//lom:lom/lom:general", OaiUtils.LOMLOMNS, record.getMetadata());
        if (general != null) {
            Element generalIdentifier = general.getChild("identifier", OaiUtils.LOMNS);

            if (generalIdentifier != null) {
                if (!(Boolean) gIdOaiCatalog.selectSingleNode(record.getMetadata())) {
                    loIdent = generalIdentifier.getChildText("entry", generalIdentifier.getNamespace());
                    ident = ident.concat(loIdent);

                    Element newIdentifier = new Element("identifier", OaiUtils.LOMNS);
                    general.addContent(0, newIdentifier);

                    Element catalog = new Element("catalog", OaiUtils.LOMNS);
                    catalog.setText("oai");
                    newIdentifier.addContent(catalog);

                    Element entry = new Element("entry", OaiUtils.LOMNS);
                    entry.setText(ident);
                    newIdentifier.addContent(entry);
                }
            } else {
                //            throw new IllegalStateException("The LO has no general.identifier set");
            }
        } else {
            //         throw new IllegalStateException("The LO has no general.identifier set");
        }
        //      } catch (JDOMException e) {
        //         harvestlogger.error("An error has occured while adding the global LO identifier : " + e.getMessage());
        //      }
    }

    public static void addReposContributor(Record record, String name) throws JDOMException {

        final MimeDir.ContentLine fn = new MimeDirImpl.ContentLine(null, "FN", null,
                new MimeDirImpl.TextValueType(new String[] { name }));
        final MimeDir.ContentLine n = new MimeDirImpl.ContentLine(null, "N", null,
                new MimeDirImpl.TextValueType(new String[] { name }));
        final MimeDir.ContentLine org = new MimeDirImpl.ContentLine(null, "ORG", null,
                new MimeDirImpl.TextValueType(new String[] { name }));
        final MimeDir.ContentLine version = new MimeDirImpl.ContentLine(null, "VERSION", null,
                new MimeDirImpl.TextValueType(new String[] { "3.0" }));

        Element metaMetadata = null;
        //      try {
        metaMetadata = JDomUtils.getXpathNode("//lom:lom/lom:metaMetadata", OaiUtils.LOMLOMNS,
                record.getMetadata());
        if (metaMetadata != null) {
            // contribute
            Element contribute = new Element("contribute", OaiUtils.LOMNS);
            metaMetadata.addContent(contribute);
            // contribute.entity
            Element entity = new Element("entity", OaiUtils.LOMNS);
            VCardImpl vcard = new VCardImpl(new MimeDir.ContentLine[] { fn, n, org, version });
            CDATA cdata = new CDATA(MimeDirUtil.toString(vcard));
            entity.addContent(cdata);
            contribute.addContent(entity);
            // contribute.role
            Element role = new Element("role", OaiUtils.LOMNS);
            contribute.addContent(role);
            Element value = new Element("value", OaiUtils.LOMNS);
            String valueString = PropertiesManager.getInstance().getProperty("Harvest.metadataProvider.value");
            value.setText(valueString);
            role.addContent(value);
            Element source = new Element("source", OaiUtils.LOMNS);
            String sourceString = PropertiesManager.getInstance().getProperty("Harvest.metadataProvider.source");
            source.setText(sourceString);
            role.addContent(source);
            // contribute.date
            Element date = new Element("date", OaiUtils.LOMNS);
            contribute.addContent(date);
            Element dateTime = new Element("dateTime", OaiUtils.LOMNS);
            Calendar dateCalendar = Calendar.getInstance();
            dateCalendar.setTimeZone(TimeZone.getTimeZone("GMT"));
            // String time = Calendar.getInstance().getTime().toString();
            dateTime.setText(DateParser.getIsoDate(dateCalendar));
            date.addContent(dateTime);
        }
    }

    //   public static String getReposIdentName(String reposName) {
    //      return reposName.replaceAll(" ", "_").replaceAll(":", "_c_").replaceAll("\n", "").replaceAll("\\(", "_opar_").replaceAll("\\)", "_clpar_");
    //   }
    //
    //   public static String getReposName(String reposIdentName) {
    //      return reposIdentName.replaceAll("_c_", ":").replaceAll("_opar_", "\\(").replaceAll("_clpar_", "\\)").replaceAll("_", " ");
    //   }

    //   public static Vector<ReposProperties> getRegistryReposProperties(){
    //      Vector<ReposProperties> list = getReposProperties();
    //      Vector<ReposProperties> repositories = new Vector<ReposProperties>();
    //      for (ReposProperties repo : list) {
    //         if(repo.getRegistryTarget().equalsIgnoreCase("true")) {
    //            repositories.add(repo);
    //         }
    //      }
    //      return repositories;
    //   }

    public static Vector<String> getReposList() {
        String targets = PropertiesManager.getInstance().getProperty("AllTargets.list");
        StringTokenizer st = new StringTokenizer(targets, ";");
        Vector<String> repositories = new Vector<String>();
        while (st.hasMoreTokens()) {
            repositories.add(st.nextToken());
        }
        return repositories;
    }

    public static Vector<ReposProperties> getReposProperties() {
        Vector<String> list = HarvesterUtils.getReposList();
        Vector<ReposProperties> repositories = new Vector<ReposProperties>();
        for (int i = 0; i < list.size(); i++) {
            String ident = list.elementAt(i);
            ReposProperties repos = getReposProperties(ident);
            repositories.add(repos);
        }
        return repositories;
    }

    public static void saveDetails(String startString, HashMap<String, String> props) {
        Iterator<String> iter = props.keySet().iterator();
        while (iter.hasNext()) {
            String key = iter.next();
            PropertiesManager.getInstance().saveProperty(startString + "." + key, props.get(key));
        }
    }

    public static void saveDetails(String startString, ReposProperties reposProps) {
        Iterator<String> iter = reposProps.getProperties().keySet().iterator();
        while (iter.hasNext()) {
            String key = iter.next();
            PropertiesManager.getInstance().saveProperty(startString + "." + key,
                    reposProps.getProperties().get(key));
        }
    }

    public static ReposProperties getReposProperties(String reposIdInternal) {
        HashMap<String, String> repos = getPropertiesStartingWith(reposIdInternal);
        return checkReposProperties(reposIdInternal, repos);
    }

    public static HarvestSessionProperties getSessionProperties() {
        HarvestSessionProperties sessionProps = new HarvestSessionProperties();
        sessionProps.setValidationUri(PropertiesManager.getInstance().getProperty("Harvest.validation.scheme"));
        sessionProps.setValidate(
                new Boolean(PropertiesManager.getInstance().getProperty("Harvest.validation")).booleanValue());
        sessionProps.setRepositoryLogs(
                new Boolean(PropertiesManager.getInstance().getProperty("log.repositoryLogs")).booleanValue());
        return sessionProps;
    }

    public static HashMap<String, String> getPropertiesStartingWith(String startString) {
        HashMap<String, String> props = new HashMap<String, String>();
        Hashtable table = PropertiesManager.getInstance().getPropertyStartingWith(startString + ".");
        Collection propsCollection = table.keySet();
        Iterator propIter = propsCollection.iterator();
        while (propIter.hasNext()) {
            String key = (String) propIter.next();
            String newKey = key.replaceFirst(startString + ".", "");
            props.put(newKey, (String) table.get(key));
        }
        return props;
    }

    protected static ReposProperties checkReposProperties(String reposIdInternal, HashMap<String, String> props) {

        ReposProperties repoProps = new ReposProperties();

        Iterator propIter = ReposProperties.getDefaultProperties().keySet().iterator();
        while (propIter.hasNext()) {
            String key = (String) propIter.next();
            if (!props.containsKey(key)) {
                String defaultVal = ReposProperties.getDefaultProperties().get(key);
                props.put(key, defaultVal);
                PropertiesManager.getInstance().saveProperty(reposIdInternal + "." + key, defaultVal);
            }
        }
        repoProps.setRepositoryIdentifierInteral(reposIdInternal);
        repoProps.setProperties(props);

        return repoProps;
    }

    public static Vector<OAIRepository> getRepositories() throws OAIException {
        Vector list = getReposList();
        Vector<OAIRepository> repositories = new Vector<OAIRepository>();
        for (int i = 0; i < list.size(); i++) {
            try {
                OAIRepository repository = new OAIRepository();
                repository.setBaseURL(PropertiesManager.getInstance().getProperty(list.elementAt(i) + ".baseURL"));
                repositories.add(repository);
            } catch (OAIException e) {
                // NOOP
            }
        }
        return repositories;
    }

    public static void resetHarvestingDate(String repos) {
        if (repos.equals("ALL")) {
            Vector<String> list = HarvesterUtils.getReposList();
            Iterator<String> iter = list.iterator();
            while (iter.hasNext()) {
                String repoString = (String) iter.next();
                resetHarvestingDate(repoString);
            }
        } else {

            OAIRepository repository = new OAIRepository();
            try {
                repository.setBaseURL(PropertiesManager.getInstance().getProperty(repos + ".baseURL"));
                String date = repository.getEarliestDatestamp();
                PropertiesManager.getInstance().saveProperty(repos + ".latestHarvestedDatestamp", date);
            } catch (OAIException e) {
                // NOOP
            }

        }
    }

    public static void setHarvestingRepositories(String list) {
        StringTokenizer tokens = new StringTokenizer(list, ";");
        while (tokens.hasMoreTokens()) {
            String repo = tokens.nextToken();
            String harvest = tokens.nextToken();
            PropertiesManager.getInstance().saveProperty(repo + ".active", harvest);
        }
    }

    public static void removeRepository(String repository) {
        PropertiesManager.getInstance().removeKeyFromPropertiesFile(repository + ".");

        Vector list = HarvesterUtils.getReposList();
        String newRepositories = "";
        for (int i = 0; i < list.size(); i++) {

            String repos = (String) list.elementAt(i);
            if (!repos.equals(repository)) {
                if (!newRepositories.equals("")) {
                    newRepositories = newRepositories + ";";
                }
                newRepositories = newRepositories + repos;
            }
        }

        PropertiesManager.getInstance().saveProperty("AllTargets.list", newRepositories);
    }

    public static ReposProperties getRegistryTarget(String registryIdCatalog, String registryIdEntry) {
        for (ReposProperties repo : getReposProperties()) {
            if (repo.getRegistryIdentifierCatalog().equals(registryIdCatalog)
                    && repo.getRegistryIdentifierEntry().equals(registryIdEntry))
                return repo;
        }
        ;
        return null;
    }

    public static ReposProperties addRepository(ReposProperties repoProps) throws Exception {

        OAIRepository repository = new OAIRepository();
        String fallBackReposId = repoProps.getBaseURL().replaceFirst("http.?://", "").split("/", 2)[0].split(":",
                2)[0];
        String internalId = fallBackReposId.replaceAll("\\.", "_");

        Hashtable ids = PropertiesManager.getInstance().getPropertyStartingWith(internalId);
        TreeSet<String> allIds = new TreeSet<String>();
        for (Object o : ids.keySet()) {
            allIds.add(((String) o).split("\\.")[0]);
        }
        int i = 1;
        while (allIds.contains(internalId)) {
            internalId = internalId.split("_u_")[0] + "_u_" + i++;
        }

        String urlString = repoProps.getBaseURL() + "?verb=Identify";
        try {
            URL u = new URL(urlString);
            HttpURLConnection http = (HttpURLConnection) u.openConnection();
            if (http.getResponseCode() == 200) {
                repository.setBaseURL(repoProps.getBaseURL());
                String url = repository.getBaseURL();
            } else {
                throw new IOException(Integer.toString(http.getResponseCode()));
            }
        } catch (IOException ex) {
            throw new Exception("The given Url isn't a valid OAI Repository (Could not connect to Url \""
                    + urlString + "\". ErrorMsg was : " + ex.getMessage() + ") ");
        } catch (OAIException e) {
            throw new Exception(
                    "The given Url \"" + urlString + "\" isn't a valid OAI Repository (" + e.getMessage() + ").");
        } catch (IllegalStateException e) {
            try {
                throw new Exception(
                        "The baseUrl of the OAI Repository is not correct (found url in Identify verb : "
                                + repository.getBaseURL() + ")");
            } catch (OAIException e1) {
                throw new Exception(
                        "The baseUrl of the OAI Repository is not correct (Error : " + e1.getMessage() + ")");
            }
        } catch (Exception e) {
            throw new Exception(
                    "The following error occured : (" + e.getClass().getName() + ") " + e.getMessage() + ".");
        }

        return addTarget(repoProps, repository, fallBackReposId, internalId);
    }

    private static ReposProperties addTarget(ReposProperties repoProps, OAIRepository repository,
            String fallBackReposId, String internalId) {
        String targets = PropertiesManager.getInstance().getProperty("AllTargets.list");
        StringTokenizer st = new StringTokenizer(targets, ";");
        boolean targetExists = false;
        while (st.hasMoreTokens()) {

            if (st.nextToken().equals(internalId)) {
                targetExists = true;
                break;
            }
        }
        if (!targetExists) {
            if (!targets.equals(""))
                targets += ";";
            targets += internalId;
            PropertiesManager.getInstance().saveProperty("AllTargets.list", targets);
        }
        ReposProperties reposPropsReturn = new ReposProperties();

        String repositoryIdent = null;
        try {
            repositoryIdent = repository.getRepositoryIdentifier().trim();
        } catch (OAIException e1) {
            // NOOP
        }
        if (repoProps.getRepositoryIdentifier() != null && !repoProps.getRepositoryIdentifier().trim().equals("")) {
            repositoryIdent = repoProps.getRepositoryIdentifier().trim();
        }
        if (repositoryIdent == null || repositoryIdent.equals("")) {
            repositoryIdent = fallBackReposId;
            Logger.getLogger(OAIHarvester.class.getName()).warn("Repository " + repoProps.getBaseURL()
                    + "has not specified a repositoryIdentifier. Using fallback option " + fallBackReposId);
        }
        reposPropsReturn.setRepositoryIdentifier(repositoryIdent.trim());

        String repositoryName = null;
        try {
            repositoryName = repository.getRepositoryName().trim().replaceAll("\n", " ");
        } catch (OAIException e) {
            //NOOP
        }
        if (repoProps.getRepositoryName() != null && !repoProps.getRepositoryName().trim().equals("")) {
            repositoryName = repoProps.getRepositoryName().trim().replaceAll("\n", " ");
        }
        if (repositoryName == null || repositoryName.equals("")) {
            repositoryName = repositoryIdent;
            Logger.getLogger(OAIHarvester.class.getName()).warn("Repository " + repoProps.getBaseURL()
                    + "has not specified a repositoryName. Using fallback option " + repositoryIdent);
        }
        reposPropsReturn.setRepositoryName(repositoryName.trim());

        reposPropsReturn.setBaseURL(repoProps.getBaseURL());
        reposPropsReturn.setProviderName(repoProps.getProviderName());
        try {
            reposPropsReturn.setLatestHarvestedDatestamp(repository.getEarliestDatestamp());
        } catch (OAIException e) {
            //NOOP
        }
        reposPropsReturn.setActive("Yes");
        reposPropsReturn.setMetadataPrefix(repoProps.getMetadataPrefix());
        reposPropsReturn.setMetadataFormat(repoProps.getMetadataFormat());
        reposPropsReturn.setHarvestingSet(repoProps.getHarvestingSet());
        reposPropsReturn.setAutoReset(repoProps.getAutoReset());
        reposPropsReturn.setValidationUri(repoProps.getValidationUri());
        reposPropsReturn.setTransformationID(repoProps.getTransformationID());
        reposPropsReturn.setRegistryIdentifierCatalog(repoProps.getRegistryIdentifierCatalog());
        reposPropsReturn.setRegistryIdentifierEntry(repoProps.getRegistryIdentifierEntry());
        try {
            reposPropsReturn.setGranularity(repository.getGranularity());
        } catch (OAIException e) {
            //NOOP
        }
        HarvesterUtils.saveDetails(internalId, reposPropsReturn);
        return reposPropsReturn;
    }

    public static void removeAllTargets() {
        Vector list = HarvesterUtils.getReposList();
        for (int i = 0; i < list.size(); i++) {

            String repos = (String) list.elementAt(i);
            removeRepository(repos);
        }
    }
}