org.opencastproject.search.impl.solr.SolrIndexManager.java Source code

Java tutorial

Introduction

Here is the source code for org.opencastproject.search.impl.solr.SolrIndexManager.java

Source

/**
 *  Copyright 2009, 2010 The Regents of the University of California
 *  Licensed under the Educational Community License, Version 2.0
 *  (the "License"); you may not use this file except in compliance
 *  with the License. You may obtain a copy of the License at
 *
 *  http://www.osedu.org/licenses/ECL-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS IS"
 *  BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 *  or implied. See the License for the specific language governing
 *  permissions and limitations under the License.
 *
 */

package org.opencastproject.search.impl.solr;

import static org.opencastproject.search.api.SearchService.READ_PERMISSION;
import static org.opencastproject.search.api.SearchService.WRITE_PERMISSION;
import static org.opencastproject.util.RequireUtil.notNull;
import static org.opencastproject.util.data.Collections.flatMap;
import static org.opencastproject.util.data.Collections.head;
import static org.opencastproject.util.data.Collections.map;
import static org.opencastproject.util.data.Option.option;

import org.opencastproject.mediapackage.Attachment;
import org.opencastproject.mediapackage.Catalog;
import org.opencastproject.mediapackage.MediaPackage;
import org.opencastproject.mediapackage.MediaPackageElement;
import org.opencastproject.mediapackage.MediaPackageElements;
import org.opencastproject.mediapackage.MediaPackageException;
import org.opencastproject.mediapackage.MediaPackageParser;
import org.opencastproject.mediapackage.MediaPackageReference;
import org.opencastproject.metadata.api.MetadataValue;
import org.opencastproject.metadata.api.StaticMetadata;
import org.opencastproject.metadata.api.StaticMetadataService;
import org.opencastproject.metadata.api.util.Interval;
import org.opencastproject.metadata.dublincore.DCMIPeriod;
import org.opencastproject.metadata.dublincore.DublinCore;
import org.opencastproject.metadata.dublincore.DublinCoreCatalog;
import org.opencastproject.metadata.dublincore.DublinCoreValue;
import org.opencastproject.metadata.dublincore.EncodingSchemeUtils;
import org.opencastproject.metadata.dublincore.Temporal;
import org.opencastproject.metadata.mpeg7.AudioVisual;
import org.opencastproject.metadata.mpeg7.FreeTextAnnotation;
import org.opencastproject.metadata.mpeg7.KeywordAnnotation;
import org.opencastproject.metadata.mpeg7.MediaDuration;
import org.opencastproject.metadata.mpeg7.MediaTime;
import org.opencastproject.metadata.mpeg7.MediaTimePoint;
import org.opencastproject.metadata.mpeg7.Mpeg7Catalog;
import org.opencastproject.metadata.mpeg7.Mpeg7CatalogService;
import org.opencastproject.metadata.mpeg7.MultimediaContent;
import org.opencastproject.metadata.mpeg7.MultimediaContentType;
import org.opencastproject.metadata.mpeg7.SpatioTemporalDecomposition;
import org.opencastproject.metadata.mpeg7.TextAnnotation;
import org.opencastproject.metadata.mpeg7.Video;
import org.opencastproject.metadata.mpeg7.VideoSegment;
import org.opencastproject.metadata.mpeg7.VideoText;
import org.opencastproject.search.api.SearchResultItem.SearchResultItemType;
import org.opencastproject.search.impl.persistence.SearchServiceDatabaseException;
import org.opencastproject.security.api.AccessControlEntry;
import org.opencastproject.security.api.AccessControlList;
import org.opencastproject.security.api.SecurityService;
import org.opencastproject.security.api.UnauthorizedException;
import org.opencastproject.series.api.SeriesException;
import org.opencastproject.series.api.SeriesService;
import org.opencastproject.util.NotFoundException;
import org.opencastproject.util.SolrUtils;
import org.opencastproject.util.data.Function;
import org.opencastproject.util.data.Option;
import org.opencastproject.workspace.api.Workspace;

import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.servlet.SolrRequestParsers;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedSet;
import java.util.TreeSet;

/**
 * Utility class used to manage the search index.
 */
public class SolrIndexManager {

    /** Logging facility */
    private static final Logger logger = LoggerFactory.getLogger(SolrIndexManager.class);

    /** Connection to the database */
    private SolrServer solrServer = null;

    /**
     * Factor multiplied to fine tune relevance and confidence impact on important keyword decision. importance =
     * RELEVANCE_BOOST * relevance + confidence
     */
    private static final double RELEVANCE_BOOST = 2.0;

    /** Number of characters an important should have at least. */
    private static final int MAX_CHAR = 3;

    /** Maximum number of important keywords to detect. */
    private static final int MAX_IMPORTANT_COUNT = 10;

    /** List of metadata services sorted by priority in reverse order. */
    private List<StaticMetadataService> mdServices;

    private SeriesService seriesService;

    private Mpeg7CatalogService mpeg7CatalogService;

    private Workspace workspace;

    private SecurityService securityService;

    /** Convert a DublinCoreValue into a date. */
    private static Function<DublinCoreValue, Option<Date>> toDateF = new Function<DublinCoreValue, Option<Date>>() {
        @Override
        public Option<Date> apply(DublinCoreValue v) {
            return EncodingSchemeUtils.decodeTemporal(v).fold(new Temporal.Match<Option<Date>>() {
                @Override
                public Option<Date> period(DCMIPeriod period) {
                    return option(period.getStart());
                }

                @Override
                public Option<Date> instant(Date instant) {
                    return Option.some(instant);
                }

                @Override
                public Option<Date> duration(long duration) {
                    return Option.none();
                }
            });
        }
    };

    /** Convert a DublinCoreValue into a duration (long). */
    private static Function<DublinCoreValue, Option<Long>> toDurationF = new Function<DublinCoreValue, Option<Long>>() {
        @Override
        public Option<Long> apply(DublinCoreValue dublinCoreValue) {
            return option(EncodingSchemeUtils.decodeDuration(dublinCoreValue));
        }
    };

    /** Dynamic reference. */
    public void setStaticMetadataServices(List<StaticMetadataService> mdServices) {
        this.mdServices = new ArrayList<StaticMetadataService>(mdServices);
        Collections.sort(this.mdServices, new Comparator<StaticMetadataService>() {
            @Override
            public int compare(StaticMetadataService a, StaticMetadataService b) {
                return b.getPriority() - a.getPriority();
            }
        });
    }

    /**
     * Creates a new management instance for the search index.
     * 
     * @param connection
     *          connection to the database
     */
    public SolrIndexManager(SolrServer connection, Workspace workspace, List<StaticMetadataService> mdServices,
            SeriesService seriesService, Mpeg7CatalogService mpeg7CatalogService, SecurityService securityService) {

        this.solrServer = notNull(connection, "solr connection");
        this.workspace = notNull(workspace, "workspace");
        this.seriesService = notNull(seriesService, "series service");
        this.mpeg7CatalogService = notNull(mpeg7CatalogService, "mpeg7 service");
        this.securityService = notNull(securityService, "security service");
        setStaticMetadataServices(notNull(mdServices, "metadata service"));
    }

    /**
     * Clears the search index. Make sure you know what you are doing.
     * 
     * @throws SolrServerException
     *           if an errors occurs while talking to solr
     */
    public void clear() throws SolrServerException {
        try {
            solrServer.deleteByQuery("*:*");
            solrServer.commit();
        } catch (IOException e) {
            throw new SolrServerException(e);
        }
    }

    /**
     * Removes the entry with the given <code>id</code> from the database. The entry can either be a series or an episode.
     * 
     * @param id
     *          identifier of the series or episode to delete
     * @param deletionDate
     *          the deletion date
     * @throws SolrServerException
     *           if an errors occurs while talking to solr
     */
    public boolean delete(String id, Date deletionDate) throws SolrServerException {
        try {
            // Load the existing episode
            QueryResponse solrResponse = null;
            try {
                SolrQuery query = new SolrQuery(Schema.ID + ":" + ClientUtils.escapeQueryChars(id) + " AND -"
                        + Schema.OC_DELETED + ":[* TO *]");
                solrResponse = solrServer.query(query);
            } catch (Exception e1) {
                throw new SolrServerException(e1);
            }

            // Did we find the episode?
            if (solrResponse.getResults().size() == 0) {
                logger.warn("Trying to delete non-existing media package {} from the search index", id);
                return false;
            }

            // Use all existing fields
            SolrDocument doc = solrResponse.getResults().get(0);
            SolrInputDocument inputDocument = new SolrInputDocument();
            for (String field : doc.getFieldNames()) {
                inputDocument.setField(field, doc.get(field));
            }

            // Set the oc_deleted field to the current date, then update
            Schema.setOcDeleted(inputDocument, deletionDate);
            solrServer.add(inputDocument);
            solrServer.commit();
            return true;
        } catch (IOException e) {
            throw new SolrServerException(e);
        }
    }

    /**
     * Posts the media package to solr. Depending on what is referenced in the media package, the method might create one
     * or two entries: one for the episode and one for the series that the episode belongs to.
     * 
     * This implementation of the search service removes all references to non "engage/download" media tracks
     * 
     * @param sourceMediaPackage
     *          the media package to post
     * @param acl
     *          the access control list for this mediapackage
     * @param now
     *          current date
     * @throws SolrServerException
     *           if an errors occurs while talking to solr
     */
    public boolean add(MediaPackage sourceMediaPackage, AccessControlList acl, Date now)
            throws SolrServerException, UnauthorizedException {
        try {
            SolrInputDocument episodeDocument = createEpisodeInputDocument(sourceMediaPackage, acl);
            Schema.setOcModified(episodeDocument, now);

            SolrInputDocument seriesDocument = createSeriesInputDocument(sourceMediaPackage.getSeries(), acl);
            if (seriesDocument != null)
                Schema.enrich(episodeDocument, seriesDocument);

            // If neither an episode nor a series was contained, there is no point in trying to update
            if (episodeDocument == null && seriesDocument == null) {
                logger.warn("Neither episode nor series metadata found");
                return false;
            }

            // Post everything to the search index
            if (episodeDocument != null)
                solrServer.add(episodeDocument);
            if (seriesDocument != null)
                solrServer.add(seriesDocument);
            solrServer.commit();
            return true;
        } catch (Exception e) {
            throw new SolrServerException(e);
        }
    }

    /**
     * Posts the media package to solr. Depending on what is referenced in the media package, the method might create one
     * or two entries: one for the episode and one for the series that the episode belongs to.
     * 
     * This implementation of the search service removes all references to non "engage/download" media tracks
     * 
     * @param sourceMediaPackage
     *          the media package to post
     * @param acl
     *          the access control list for this mediapackage
     * @param deletionDate
     *          the deletion date
     * @param modificationDate
     *          the modification date
     * @return <code>true</code> if successfully added
     * @throws SolrServerException
     *           if an errors occurs while talking to solr
     */
    public boolean add(MediaPackage sourceMediaPackage, AccessControlList acl, Date deletionDate,
            Date modificationDate) throws SolrServerException {
        try {
            SolrInputDocument episodeDocument = createEpisodeInputDocument(sourceMediaPackage, acl);

            SolrInputDocument seriesDocument = createSeriesInputDocument(sourceMediaPackage.getSeries(), acl);
            if (seriesDocument != null)
                Schema.enrich(episodeDocument, seriesDocument);

            Schema.setOcModified(episodeDocument, modificationDate);
            if (deletionDate != null)
                Schema.setOcDeleted(episodeDocument, deletionDate);

            solrServer.add(episodeDocument);
            solrServer.add(seriesDocument);
            solrServer.commit();
            return true;
        } catch (Exception e) {
            try {
                solrServer.rollback();
            } catch (IOException e1) {
                throw new SolrServerException(e1);
            }
            throw new SolrServerException(e);
        }
    }

    /**
     * Creates a solr input document for the episode metadata of the media package.
     * 
     * @param mediaPackage
     *          the media package
     * @param acl
     *          the access control list for this mediapackage
     * @return an input document ready to be posted to solr
     * @throws MediaPackageException
     *           if serialization of the media package fails
     */
    private SolrInputDocument createEpisodeInputDocument(MediaPackage mediaPackage, AccessControlList acl)
            throws MediaPackageException, IOException {

        SolrInputDocument doc = new SolrInputDocument();
        String mediaPackageId = mediaPackage.getIdentifier().toString();

        // Fill the input document
        Schema.setId(doc, mediaPackageId);
        // /
        // OC specific fields
        Schema.setOcMediatype(doc, SearchResultItemType.AudioVisual.toString());
        Schema.setOrganization(doc, securityService.getOrganization().getId());
        Schema.setOcMediapackage(doc, MediaPackageParser.getAsXml(mediaPackage));
        Schema.setOcElementtags(doc, tags(mediaPackage));
        Schema.setOcElementflavors(doc, flavors(mediaPackage));
        // Add cover
        Attachment[] cover = mediaPackage.getAttachments(MediaPackageElements.MEDIAPACKAGE_COVER_FLAVOR);
        if (cover != null && cover.length > 0) {
            Schema.setOcCover(doc, cover[0].getURI().toString());
        }

        // /
        // Add standard dublin core fields
        // naive approach. works as long as only setters, not adders are available in the schema
        for (StaticMetadata md : getMetadata(mdServices, mediaPackage))
            addEpisodeMetadata(doc, md);

        // /
        // Add mpeg7
        logger.debug("Looking for mpeg-7 catalogs containing segment texts");
        Catalog[] mpeg7Catalogs = mediaPackage.getCatalogs(MediaPackageElements.TEXTS);
        if (mpeg7Catalogs.length == 0) {
            logger.debug("No text catalogs found, trying segments only");
            mpeg7Catalogs = mediaPackage.getCatalogs(MediaPackageElements.SEGMENTS);
        }
        // TODO: merge the segments from each mpeg7 if there is more than one mpeg7 catalog
        if (mpeg7Catalogs.length > 0) {
            Mpeg7Catalog mpeg7Catalog = loadMpeg7Catalog(mpeg7Catalogs[0]);
            addMpeg7Metadata(doc, mediaPackage, mpeg7Catalog);
        } else {
            logger.debug("No segmentation catalog found");
        }

        // /
        // Add authorization
        setAuthorization(doc, securityService, acl);

        return doc;
    }

    static void addEpisodeMetadata(final SolrInputDocument doc, final StaticMetadata md) {
        Schema.fill(doc, new Schema.FieldCollector() {
            @Override
            public Option<String> getId() {
                return Option.none();
            }

            @Override
            public Option<String> getOrganization() {
                return Option.none();
            }

            @Override
            public Option<Date> getDcCreated() {
                return md.getCreated();
            }

            @Override
            public Option<Long> getDcExtent() {
                return md.getExtent();
            }

            @Override
            public Option<String> getDcLanguage() {
                return md.getLanguage();
            }

            @Override
            public Option<String> getDcIsPartOf() {
                return md.getIsPartOf();
            }

            @Override
            public Option<String> getDcReplaces() {
                return md.getReplaces();
            }

            @Override
            public Option<String> getDcType() {
                return md.getType();
            }

            @Override
            public Option<Date> getDcAvailableFrom() {
                return md.getAvailable().flatMap(new Function<Interval, Option<Date>>() {
                    @Override
                    public Option<Date> apply(Interval interval) {
                        return interval.fold(new Interval.Match<Option<Date>>() {
                            @Override
                            public Option<Date> bounded(Date leftBound, Date rightBound) {
                                return Option.some(leftBound);
                            }

                            @Override
                            public Option<Date> leftInfinite(Date rightBound) {
                                return Option.none();
                            }

                            @Override
                            public Option<Date> rightInfinite(Date leftBound) {
                                return Option.some(leftBound);
                            }
                        });
                    }
                });
            }

            @Override
            public Option<Date> getDcAvailableTo() {
                return md.getAvailable().flatMap(new Function<Interval, Option<Date>>() {
                    @Override
                    public Option<Date> apply(Interval interval) {
                        return interval.fold(new Interval.Match<Option<Date>>() {
                            @Override
                            public Option<Date> bounded(Date leftBound, Date rightBound) {
                                return Option.some(rightBound);
                            }

                            @Override
                            public Option<Date> leftInfinite(Date rightBound) {
                                return Option.some(rightBound);
                            }

                            @Override
                            public Option<Date> rightInfinite(Date leftBound) {
                                return Option.none();
                            }
                        });
                    }
                });
            }

            @Override
            public List<DField<String>> getDcTitle() {
                return fromMValue(md.getTitles());
            }

            @Override
            public List<DField<String>> getDcSubject() {
                return fromMValue(md.getSubjects());
            }

            @Override
            public List<DField<String>> getDcCreator() {
                return fromMValue(md.getCreators());
            }

            @Override
            public List<DField<String>> getDcPublisher() {
                return fromMValue(md.getPublishers());
            }

            @Override
            public List<DField<String>> getDcContributor() {
                return fromMValue(md.getContributors());
            }

            @Override
            public List<DField<String>> getDcDescription() {
                return fromMValue(md.getDescription());
            }

            @Override
            public List<DField<String>> getDcRightsHolder() {
                return fromMValue(md.getRightsHolders());
            }

            @Override
            public List<DField<String>> getDcSpatial() {
                return fromMValue(md.getSpatials());
            }

            @Override
            public List<DField<String>> getDcAccessRights() {
                return fromMValue(md.getAccessRights());
            }

            @Override
            public List<DField<String>> getDcLicense() {
                return fromMValue(md.getLicenses());
            }

            @Override
            public Option<String> getOcMediatype() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<String> getOcMediapackage() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<String> getOcKeywords() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<String> getOcCover() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<Date> getOcModified() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<Date> getOcDeleted() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<String> getOcElementtags() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<String> getOcElementflavors() {
                return Option.none(); // set elsewhere
            }

            @Override
            public List<DField<String>> getOcAcl() {
                return Collections.EMPTY_LIST; // set elsewhere
            }

            @Override
            public List<DField<String>> getSegmentText() {
                return Collections.EMPTY_LIST; // set elsewhere
            }

            @Override
            public List<DField<String>> getSegmentHint() {
                return Collections.EMPTY_LIST; // set elsewhere
            }
        });
    }

    static List<DField<String>> fromMValue(List<MetadataValue<String>> as) {
        return map(as, new ArrayList<DField<String>>(), new Function<MetadataValue<String>, DField<String>>() {
            @Override
            public DField<String> apply(MetadataValue<String> v) {
                return new DField<String>(v.getValue(), v.getLanguage());
            }
        });
    }

    static List<DField<String>> fromDCValue(List<DublinCoreValue> as) {
        return map(as, new ArrayList<DField<String>>(), new Function<DublinCoreValue, DField<String>>() {
            @Override
            public DField<String> apply(DublinCoreValue v) {
                return new DField<String>(v.getValue(), v.getLanguage());
            }
        });
    }

    /**
     * Adds authorization fields to the solr document.
     * 
     * @param doc
     *          the solr document
     * @param acl
     *          the access control list
     */
    static void setAuthorization(SolrInputDocument doc, SecurityService securityService, AccessControlList acl) {
        Map<String, List<String>> permissions = new HashMap<String, List<String>>();

        // Define containers for common permissions
        List<String> reads = new ArrayList<String>();
        permissions.put(READ_PERMISSION, reads);
        List<String> writes = new ArrayList<String>();
        permissions.put(WRITE_PERMISSION, writes);

        String adminRole = securityService.getOrganization().getAdminRole();

        // The admin user can read and write
        if (adminRole != null) {
            reads.add(adminRole);
            writes.add(adminRole);
        }

        for (AccessControlEntry entry : acl.getEntries()) {
            if (!entry.isAllow()) {
                logger.warn("Search service does not support denial via ACL, ignoring {}", entry);
                continue;
            }
            List<String> actionPermissions = permissions.get(entry.getAction());
            /*
             * MH-8353 a series could have a permission defined we don't know how to handle -DH
             */
            if (actionPermissions == null) {
                logger.warn("Search service doesn't know how to handle action: " + entry.getAction());
                continue;
            }
            if (acl == null) {
                actionPermissions = new ArrayList<String>();
                permissions.put(entry.getAction(), actionPermissions);
            }
            actionPermissions.add(entry.getRole());

        }

        // Write the permissions to the solr document
        for (Map.Entry<String, List<String>> entry : permissions.entrySet()) {
            Schema.setOcAcl(doc, new DField<String>(mkString(entry.getValue(), ","), entry.getKey()));
        }
    }

    static String mkString(Collection<?> as, String sep) {
        StringBuffer b = new StringBuffer();
        for (Object a : as) {
            b.append(a).append(sep);
        }
        return b.substring(0, b.length() - sep.length());
    }

    private Mpeg7Catalog loadMpeg7Catalog(Catalog cat) throws IOException {
        InputStream in = null;
        try {
            File f = workspace.get(cat.getURI());
            in = new FileInputStream(f);
            return mpeg7CatalogService.load(in);
        } catch (NotFoundException e) {
            throw new IOException("Unable to load metadata from mpeg7 catalog " + cat);
        } finally {
            IOUtils.closeQuietly(in);
        }
    }

    /**
     * Creates a solr input document for the series metadata of the media package.
     * 
     * @param seriesId
     *          the id of the series
     * @param acl
     *          the access control list for this mediapackage
     * @return an input document ready to be posted to solr or null
     */
    private SolrInputDocument createSeriesInputDocument(String seriesId, AccessControlList acl)
            throws IOException, UnauthorizedException {

        if (seriesId == null)
            return null;
        DublinCoreCatalog dc = null;
        try {
            dc = seriesService.getSeries(seriesId);
        } catch (SeriesException e) {
            logger.debug("No series dublincore found for series id " + seriesId);
            return null;
        } catch (NotFoundException e) {
            logger.debug("No series dublincore found for series id " + seriesId);
            return null;
        }

        SolrInputDocument doc = new SolrInputDocument();

        // Populate document with existing data
        try {
            StringBuffer query = new StringBuffer("q=");
            query = query.append(Schema.ID).append(":").append(SolrUtils.clean(seriesId));
            SolrParams params = SolrRequestParsers.parseQueryString(query.toString());
            QueryResponse solrResponse = solrServer.query(params);
            if (solrResponse.getResults().size() > 0) {
                SolrDocument existingSolrDocument = solrResponse.getResults().get(0);
                for (String fieldName : existingSolrDocument.getFieldNames()) {
                    doc.addField(fieldName, existingSolrDocument.getFieldValue(fieldName));
                }
            }
        } catch (Exception e) {
            logger.error("Error trying to load series " + seriesId, e);
        }

        // Fill document
        Schema.setId(doc, seriesId);

        // OC specific fields
        Schema.setOrganization(doc, securityService.getOrganization().getId());
        Schema.setOcMediatype(doc, SearchResultItemType.Series.toString());
        Schema.setOcModified(doc, new Date());

        // DC fields
        addSeriesMetadata(doc, dc);

        // Authorization
        setAuthorization(doc, securityService, acl);

        return doc;
    }

    /**
     * Add the standard dublin core fields to a series document.
     * 
     * @param doc
     *          the solr document to fill
     * @param dc
     *          the dublin core catalog to get the data from
     */
    static void addSeriesMetadata(final SolrInputDocument doc, final DublinCoreCatalog dc) throws IOException {
        Schema.fill(doc, new Schema.FieldCollector() {
            @Override
            public Option<String> getId() {
                return Option.some(dc.getFirst(DublinCore.PROPERTY_IDENTIFIER));
            }

            @Override
            public Option<String> getOrganization() {
                return Option.none();
            }

            @Override
            public Option<Date> getDcCreated() {
                return head(dc.get(DublinCore.PROPERTY_CREATED)).flatMap(toDateF);
            }

            @Override
            public Option<Long> getDcExtent() {
                return head(dc.get(DublinCore.PROPERTY_EXTENT)).flatMap(toDurationF);
            }

            @Override
            public Option<String> getDcLanguage() {
                return option(dc.getFirst(DublinCore.PROPERTY_LANGUAGE));
            }

            @Override
            public Option<String> getDcIsPartOf() {
                return option(dc.getFirst(DublinCore.PROPERTY_IS_PART_OF));
            }

            @Override
            public Option<String> getDcReplaces() {
                return option(dc.getFirst(DublinCore.PROPERTY_REPLACES));
            }

            @Override
            public Option<String> getDcType() {
                return option(dc.getFirst(DublinCore.PROPERTY_TYPE));
            }

            @Override
            public Option<Date> getDcAvailableFrom() {
                return option(dc.getFirst(DublinCore.PROPERTY_AVAILABLE))
                        .flatMap(new Function<String, Option<Date>>() {
                            @Override
                            public Option<Date> apply(String s) {
                                return option(EncodingSchemeUtils.decodePeriod(s).getStart());
                            }
                        });
            }

            @Override
            public Option<Date> getDcAvailableTo() {
                return option(dc.getFirst(DublinCore.PROPERTY_AVAILABLE))
                        .flatMap(new Function<String, Option<Date>>() {
                            @Override
                            public Option<Date> apply(String s) {
                                return option(EncodingSchemeUtils.decodePeriod(s).getEnd());
                            }
                        });
            }

            @Override
            public List<DField<String>> getDcTitle() {
                return fromDCValue(dc.get(DublinCore.PROPERTY_TITLE));
            }

            @Override
            public List<DField<String>> getDcSubject() {
                return fromDCValue(dc.get(DublinCore.PROPERTY_SUBJECT));
            }

            @Override
            public List<DField<String>> getDcCreator() {
                return fromDCValue(dc.get(DublinCore.PROPERTY_CREATOR));
            }

            @Override
            public List<DField<String>> getDcPublisher() {
                return fromDCValue(dc.get(DublinCore.PROPERTY_PUBLISHER));
            }

            @Override
            public List<DField<String>> getDcContributor() {
                return fromDCValue(dc.get(DublinCore.PROPERTY_CONTRIBUTOR));

            }

            @Override
            public List<DField<String>> getDcDescription() {
                return fromDCValue(dc.get(DublinCore.PROPERTY_DESCRIPTION));
            }

            @Override
            public List<DField<String>> getDcRightsHolder() {
                return fromDCValue(dc.get(DublinCore.PROPERTY_RIGHTS_HOLDER));
            }

            @Override
            public List<DField<String>> getDcSpatial() {
                return fromDCValue(dc.get(DublinCore.PROPERTY_SPATIAL));
            }

            @Override
            public List<DField<String>> getDcAccessRights() {
                return fromDCValue(dc.get(DublinCore.PROPERTY_ACCESS_RIGHTS));
            }

            @Override
            public List<DField<String>> getDcLicense() {
                return fromDCValue(dc.get(DublinCore.PROPERTY_LICENSE));
            }

            @Override
            public Option<String> getOcMediatype() {
                return Option.none();
            }

            @Override
            public Option<String> getOcMediapackage() {
                return Option.none();
            }

            @Override
            public Option<String> getOcKeywords() {
                return Option.none();
            }

            @Override
            public Option<String> getOcCover() {
                return Option.none();
            }

            @Override
            public Option<Date> getOcModified() {
                return Option.none();
            }

            @Override
            public Option<Date> getOcDeleted() {
                return Option.none();
            }

            @Override
            public Option<String> getOcElementtags() {
                return Option.none();
            }

            @Override
            public Option<String> getOcElementflavors() {
                return Option.none();
            }

            @Override
            public List<DField<String>> getOcAcl() {
                return Collections.EMPTY_LIST;
            }

            @Override
            public List<DField<String>> getSegmentText() {
                return Collections.EMPTY_LIST;
            }

            @Override
            public List<DField<String>> getSegmentHint() {
                return Collections.EMPTY_LIST;
            }
        });
    }

    /**
     * Add the mpeg 7 catalog data to the solr document.
     * 
     * @param doc
     *          the input document to the solr index
     * @param mpeg7
     *          the mpeg7 catalog
     */
    @SuppressWarnings("unchecked")
    static void addMpeg7Metadata(SolrInputDocument doc, MediaPackage mediaPackage, Mpeg7Catalog mpeg7) {

        // Check for multimedia content
        if (!mpeg7.multimediaContent().hasNext()) {
            logger.warn("Mpeg-7 doesn't contain  multimedia content");
            return;
        }

        // Get the content duration by looking at the first content track. This
        // of course assumes that all tracks are equally long.
        MultimediaContent<? extends MultimediaContentType> mc = mpeg7.multimediaContent().next();
        MultimediaContentType mct = mc.elements().next();
        MediaTime mediaTime = mct.getMediaTime();
        Schema.setDcExtent(doc, mediaTime.getMediaDuration().getDurationInMilliseconds());

        // Check if the keywords have been filled by (manually) added dublin
        // core data. If not, look for the most relevant fields in mpeg-7.
        SortedSet<TextAnnotation> sortedAnnotations = null;
        if (!"".equals(Schema.getOcKeywords(doc))) {
            sortedAnnotations = new TreeSet<TextAnnotation>(new Comparator<TextAnnotation>() {
                public int compare(TextAnnotation a1, TextAnnotation a2) {
                    if ((RELEVANCE_BOOST * a1.getRelevance()
                            + a1.getConfidence()) > (RELEVANCE_BOOST * a2.getRelevance() + a2.getConfidence()))
                        return -1;
                    else if ((RELEVANCE_BOOST * a1.getRelevance()
                            + a1.getConfidence()) < (RELEVANCE_BOOST * a2.getRelevance() + a2.getConfidence()))
                        return 1;
                    return 0;
                }
            });
        }

        // Iterate over the tracks and extract keywords and hints
        Iterator<MultimediaContent<? extends MultimediaContentType>> mmIter = mpeg7.multimediaContent();
        int segmentCount = 0;

        while (mmIter.hasNext()) {
            MultimediaContent<?> multimediaContent = mmIter.next();

            // We need to process visual segments first, due to the way they are handled in the ui.
            for (Iterator<?> iterator = multimediaContent.elements(); iterator.hasNext();) {

                MultimediaContentType type = (MultimediaContentType) iterator.next();
                if (!(type instanceof Video) && !(type instanceof AudioVisual))
                    continue;

                // for every segment in the current multimedia content track

                Video video = (Video) type;
                Iterator<VideoSegment> vsegments = (Iterator<VideoSegment>) video.getTemporalDecomposition()
                        .segments();
                while (vsegments.hasNext()) {
                    VideoSegment segment = vsegments.next();

                    StringBuffer segmentText = new StringBuffer();
                    StringBuffer hintField = new StringBuffer();

                    // Collect the video text elements to a segment text
                    SpatioTemporalDecomposition spt = segment.getSpatioTemporalDecomposition();
                    if (spt != null) {
                        for (VideoText videoText : spt.getVideoText()) {
                            if (segmentText.length() > 0)
                                segmentText.append(" ");
                            segmentText.append(videoText.getText().getText());
                            // TODO: Add hint on bounding box
                        }
                    }

                    // Add keyword annotations
                    Iterator<TextAnnotation> textAnnotations = segment.textAnnotations();
                    while (textAnnotations.hasNext()) {
                        TextAnnotation textAnnotation = textAnnotations.next();
                        Iterator<?> kwIter = textAnnotation.keywordAnnotations();
                        while (kwIter.hasNext()) {
                            KeywordAnnotation keywordAnnotation = (KeywordAnnotation) kwIter.next();
                            if (segmentText.length() > 0)
                                segmentText.append(" ");
                            segmentText.append(keywordAnnotation.getKeyword());
                        }
                    }

                    // Add free text annotations
                    Iterator<TextAnnotation> freeIter = segment.textAnnotations();
                    if (freeIter.hasNext()) {
                        Iterator<FreeTextAnnotation> freeTextIter = freeIter.next().freeTextAnnotations();
                        while (freeTextIter.hasNext()) {
                            FreeTextAnnotation freeTextAnnotation = freeTextIter.next();
                            if (segmentText.length() > 0)
                                segmentText.append(" ");
                            segmentText.append(freeTextAnnotation.getText());
                        }
                    }

                    // add segment text to solr document
                    Schema.setSegmentText(doc,
                            new DField<String>(segmentText.toString(), Integer.toString(segmentCount)));

                    // get the segments time properties
                    MediaTimePoint timepoint = segment.getMediaTime().getMediaTimePoint();
                    MediaDuration duration = segment.getMediaTime().getMediaDuration();

                    // TODO: define a class with hint field constants
                    hintField.append("time=" + timepoint.getTimeInMilliseconds() + "\n");
                    hintField.append("duration=" + duration.getDurationInMilliseconds() + "\n");

                    // Look for preview images. Their characteristics are that they are
                    // attached as attachments with a flavor of preview/<something>.
                    String time = timepoint.toString();
                    for (Attachment slide : mediaPackage
                            .getAttachments(MediaPackageElements.PRESENTATION_SEGMENT_PREVIEW)) {
                        MediaPackageReference ref = slide.getReference();
                        if (ref != null && time.equals(ref.getProperty("time"))) {
                            hintField.append("preview");
                            hintField.append(".");
                            hintField.append(ref.getIdentifier());
                            hintField.append("=");
                            hintField.append(slide.getURI().toString());
                            hintField.append("\n");
                        }
                    }

                    logger.trace("Adding segment: " + timepoint.toString());
                    Schema.setSegmentHint(doc,
                            new DField<String>(hintField.toString(), Integer.toString(segmentCount)));

                    // increase segment counter
                    segmentCount++;
                }
            }
        }

        // Put the most important keywords into a special solr field
        if (sortedAnnotations != null) {
            Schema.setOcKeywords(doc, importantKeywordsString(sortedAnnotations).toString());
        }
    }

    /**
     * Generates a string with the most important kewords from the text annotation.
     * 
     * @param sortedAnnotations
     * @return The keyword string.
     */
    static StringBuffer importantKeywordsString(SortedSet<TextAnnotation> sortedAnnotations) {

        // important keyword:
        // - high relevance
        // - high confidence
        // - occur often
        // - more than MAX_CHAR chars

        // calculate keyword occurences (histogram) and importance
        ArrayList<String> list = new ArrayList<String>();
        Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
        TextAnnotation textAnnotation = null;
        String keyword = null;

        HashMap<String, Integer> histogram = new HashMap<String, Integer>();
        HashMap<String, Double> importance = new HashMap<String, Double>();
        int occ = 0;
        double imp;
        while (textAnnotations.hasNext()) {
            textAnnotation = textAnnotations.next();
            Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
            while (keywordAnnotations.hasNext()) {
                KeywordAnnotation annotation = keywordAnnotations.next();
                keyword = annotation.getKeyword().toLowerCase();
                if (keyword.length() > MAX_CHAR) {
                    occ = 0;
                    if (histogram.keySet().contains(keyword)) {
                        occ = histogram.get(keyword);
                    }
                    histogram.put(keyword, occ + 1);

                    // here the importance value is calculated
                    // from relevance, confidence and frequency of occurence.
                    imp = (RELEVANCE_BOOST * getMaxRelevance(keyword, sortedAnnotations)
                            + getMaxConfidence(keyword, sortedAnnotations)) * (occ + 1);
                    importance.put(keyword, imp);
                }
            }
        }

        // get the MAX_IMPORTANT_COUNT most important keywords
        StringBuffer buf = new StringBuffer();

        while (list.size() < MAX_IMPORTANT_COUNT && importance.size() > 0) {
            double max = 0.0;
            String maxKeyword = null;

            // get maximum from importance list
            for (Entry<String, Double> entry : importance.entrySet()) {
                keyword = entry.getKey();
                if (max < entry.getValue()) {
                    max = entry.getValue();
                    maxKeyword = keyword;
                }
            }

            // pop maximum
            importance.remove(maxKeyword);

            // append keyword to string
            if (buf.length() > 0)
                buf.append(" ");
            buf.append(maxKeyword);
        }

        return buf;
    }

    /**
     * Gets the maximum confidence for a given keyword in the text annotation.
     * 
     * @param keyword
     * @param sortedAnnotations
     * @return The maximum confidence value.
     */
    static double getMaxConfidence(String keyword, SortedSet<TextAnnotation> sortedAnnotations) {
        double max = 0.0;
        String needle = null;
        TextAnnotation textAnnotation = null;
        Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
        while (textAnnotations.hasNext()) {
            textAnnotation = textAnnotations.next();
            Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
            while (keywordAnnotations.hasNext()) {
                KeywordAnnotation ann = keywordAnnotations.next();
                needle = ann.getKeyword().toLowerCase();
                if (keyword.equals(needle)) {
                    if (max < textAnnotation.getConfidence()) {
                        max = textAnnotation.getConfidence();
                    }
                }
            }
        }
        return max;
    }

    /**
     * Gets the maximum relevance for a given keyword in the text annotation.
     * 
     * @param keyword
     * @param sortedAnnotations
     * @return The maximum relevance value.
     */
    static double getMaxRelevance(String keyword, SortedSet<TextAnnotation> sortedAnnotations) {
        double max = 0.0;
        String needle = null;
        TextAnnotation textAnnotation = null;
        Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
        while (textAnnotations.hasNext()) {
            textAnnotation = textAnnotations.next();
            Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
            while (keywordAnnotations.hasNext()) {
                KeywordAnnotation ann = keywordAnnotations.next();
                needle = ann.getKeyword().toLowerCase();
                if (keyword.equals(needle)) {
                    if (max < textAnnotation.getRelevance()) {
                        max = textAnnotation.getRelevance();
                    }
                }
            }
        }
        return max;
    }

    /**
     * Get metadata from all registered metadata services.
     */
    static List<StaticMetadata> getMetadata(final List<StaticMetadataService> mdServices, final MediaPackage mp) {
        return flatMap(mdServices, new ArrayList<StaticMetadata>(),
                new Function<StaticMetadataService, Collection<StaticMetadata>>() {
                    @Override
                    public Collection<StaticMetadata> apply(StaticMetadataService s) {
                        StaticMetadata md = s.getMetadata(mp);
                        return md != null ? Arrays.asList(md) : Collections.<StaticMetadata>emptyList();
                    }
                });
    }

    /**
     * Return all media package tags as a space separated string.
     */
    static String tags(MediaPackage mp) {
        StringBuilder sb = new StringBuilder();
        for (MediaPackageElement element : mp.getElements()) {
            for (String tag : element.getTags()) {
                sb.append(tag);
                sb.append(" ");
            }
        }
        return sb.toString();
    }

    /**
     * Return all media package flavors as a space separated string.
     */
    static String flavors(MediaPackage mp) {
        StringBuilder sb = new StringBuilder();
        for (MediaPackageElement element : mp.getElements()) {
            if (element.getFlavor() != null) {
                sb.append(element.getFlavor().toString());
                sb.append(" ");
            }
        }
        return sb.toString();
    }

    /**
     * Returns number of entries in search index, across all organizations.
     * 
     * @return number of entries in search index
     * @throws SearchServiceDatabaseException
     *           if count cannot be retrieved
     */
    public long count() throws SearchServiceDatabaseException {
        try {
            QueryResponse response = solrServer.query(new SolrQuery("*:*"));
            return response.getResults().getNumFound();
        } catch (SolrServerException e) {
            throw new SearchServiceDatabaseException(e);
        }
    }
}