org.opencastproject.archive.opencast.solr.SolrIndexManager.java Source code

Introduction

Here is the source code for org.opencastproject.archive.opencast.solr.SolrIndexManager.java
Source

/**
 * Licensed to The Apereo Foundation under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 *
 * The Apereo Foundation licenses this file to you under the Educational
 * Community License, Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of the License
 * at:
 *
 *   http://opensource.org/licenses/ecl2.txt
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 * License for the specific language governing permissions and limitations under
 * the License.
 *
 */

package org.opencastproject.archive.opencast.solr;

import static org.opencastproject.util.data.Collections.flatMap;
import static org.opencastproject.util.data.Collections.list;
import static org.opencastproject.util.data.Collections.map;
import static org.opencastproject.util.data.Collections.nil;
import static org.opencastproject.util.data.Monadics.mlist;
import static org.opencastproject.util.data.Option.none;
import static org.opencastproject.util.data.Option.option;
import static org.opencastproject.util.data.Option.some;
import static org.opencastproject.util.data.functions.Misc.chuck;

import org.opencastproject.archive.api.Version;
import org.opencastproject.archive.base.persistence.ArchiveDbException;
import org.opencastproject.mediapackage.Attachment;
import org.opencastproject.mediapackage.Catalog;
import org.opencastproject.mediapackage.MediaPackage;
import org.opencastproject.mediapackage.MediaPackageElement;
import org.opencastproject.mediapackage.MediaPackageElements;
import org.opencastproject.mediapackage.MediaPackageException;
import org.opencastproject.mediapackage.MediaPackageParser;
import org.opencastproject.mediapackage.MediaPackageReference;
import org.opencastproject.metadata.api.MetadataValue;
import org.opencastproject.metadata.api.StaticMetadata;
import org.opencastproject.metadata.api.StaticMetadataService;
import org.opencastproject.metadata.api.util.Interval;
import org.opencastproject.metadata.dublincore.DCMIPeriod;
import org.opencastproject.metadata.dublincore.DublinCore;
import org.opencastproject.metadata.dublincore.DublinCoreCatalog;
import org.opencastproject.metadata.dublincore.DublinCoreUtil;
import org.opencastproject.metadata.dublincore.DublinCoreValue;
import org.opencastproject.metadata.dublincore.EncodingSchemeUtils;
import org.opencastproject.metadata.dublincore.Temporal;
import org.opencastproject.metadata.mpeg7.AudioVisual;
import org.opencastproject.metadata.mpeg7.FreeTextAnnotation;
import org.opencastproject.metadata.mpeg7.KeywordAnnotation;
import org.opencastproject.metadata.mpeg7.MediaDuration;
import org.opencastproject.metadata.mpeg7.MediaTime;
import org.opencastproject.metadata.mpeg7.MediaTimePoint;
import org.opencastproject.metadata.mpeg7.Mpeg7Catalog;
import org.opencastproject.metadata.mpeg7.Mpeg7CatalogService;
import org.opencastproject.metadata.mpeg7.MultimediaContent;
import org.opencastproject.metadata.mpeg7.MultimediaContentType;
import org.opencastproject.metadata.mpeg7.SpatioTemporalDecomposition;
import org.opencastproject.metadata.mpeg7.TextAnnotation;
import org.opencastproject.metadata.mpeg7.Video;
import org.opencastproject.metadata.mpeg7.VideoSegment;
import org.opencastproject.metadata.mpeg7.VideoText;
import org.opencastproject.security.api.AccessControlList;
import org.opencastproject.security.api.AccessControlParser;
import org.opencastproject.security.api.SecurityService;
import org.opencastproject.series.api.SeriesService;
import org.opencastproject.util.NotFoundException;
import org.opencastproject.util.data.Cell;
import org.opencastproject.util.data.Function;
import org.opencastproject.util.data.Function0;
import org.opencastproject.util.data.Option;
import org.opencastproject.workspace.api.Workspace;

import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.SortedSet;
import java.util.TreeSet;

/** Utility class used to manage the search index. */
// todo review/rewrite solr package
public class SolrIndexManager {

    /** Logging facility */
    private static final Logger logger = LoggerFactory.getLogger(SolrIndexManager.class);

    /** Connection to the database */
    private SolrServer solrServer = null;

    /**
     * Factor multiplied to fine tune relevance and confidence impact on important keyword decision. importance =
     * RELEVANCE_BOOST * relevance + confidence
     */
    private static final double RELEVANCE_BOOST = 2.0;

    /** Number of characters an important should have at least. */
    private static final int MAX_CHAR = 3;

    /** Maximum number of important keywords to detect. */
    private static final int MAX_IMPORTANT_COUNT = 10;

    /** List of metadata services sorted by priority in reverse order. */
    private Cell<List<StaticMetadataService>> metadataSvcs;

    private SeriesService seriesSvc;

    private Mpeg7CatalogService mpeg7CatalogSvc;

    private Workspace workspace;

    private SecurityService securitySvc;

    /**
     * Creates a new management instance for the search index.
     * 
     * @param solrServer
     *          connection to the database
     */
    public SolrIndexManager(SolrServer solrServer, Workspace workspace,
            Cell<List<StaticMetadataService>> metadataSvcs, SeriesService seriesSvc,
            Mpeg7CatalogService mpeg7CatalogSvc, SecurityService securitySvc) {
        this.solrServer = solrServer;
        this.workspace = workspace;
        this.seriesSvc = seriesSvc;
        this.mpeg7CatalogSvc = mpeg7CatalogSvc;
        this.securitySvc = securitySvc;
        this.metadataSvcs = metadataSvcs
                .lift(new Function<List<StaticMetadataService>, List<StaticMetadataService>>() {
                    @Override
                    public List<StaticMetadataService> apply(List<StaticMetadataService> metadataSvcs) {
                        return mlist(metadataSvcs).sort(priorityComparator).value();
                    }
                });
    }

    public static final Comparator<StaticMetadataService> priorityComparator = new Comparator<StaticMetadataService>() {
        @Override
        public int compare(StaticMetadataService a, StaticMetadataService b) {
            return b.getPriority() - a.getPriority();
        }
    };

    /** Convert a DublinCoreValue into a date. */
    private static Function<DublinCoreValue, Option<Date>> toDateF = new Function<DublinCoreValue, Option<Date>>() {
        @Override
        public Option<Date> apply(DublinCoreValue v) {
            return EncodingSchemeUtils.decodeTemporal(v).fold(new Temporal.Match<Option<Date>>() {
                @Override
                public Option<Date> period(DCMIPeriod period) {
                    return option(period.getStart());
                }

                @Override
                public Option<Date> instant(Date instant) {
                    return Option.some(instant);
                }

                @Override
                public Option<Date> duration(long duration) {
                    return Option.none();
                }
            });
        }
    };

    /** Convert a DublinCoreValue into a duration (long). */
    private static Function<DublinCoreValue, Option<Long>> toDurationF = new Function<DublinCoreValue, Option<Long>>() {
        @Override
        public Option<Long> apply(DublinCoreValue dublinCoreValue) {
            return option(EncodingSchemeUtils.decodeDuration(dublinCoreValue));
        }
    };

    /**
     * Clears the search index. Make sure you know what you are doing.
     * 
     * @throws SolrServerException
     *           if an errors occurs while talking to solr
     */
    public void clear() throws SolrServerException {
        try {
            solrServer.deleteByQuery("*:*");
            solrServer.commit();
        } catch (IOException e) {
            throw new SolrServerException(e);
        }
    }

    /**
     * Returns number of episodes in search index, across all organizations.
     * 
     * @return number of episodes in search index
     * @throws ArchiveDbException
     *           if count cannot be retrieved
     */
    public long count() throws ArchiveDbException {
        try {
            QueryResponse response = solrServer.query(new SolrQuery("*:*"));
            return response.getResults().getNumFound();
        } catch (SolrServerException e) {
            throw new ArchiveDbException(e);
        }
    }

    /**
     * Set the deleted flag of all versions of the media package with the given id.
     * 
     * @param id
     *          identifier of the series or episode to delete
     * @param deletionDate
     *          the deletion date
     * @throws SolrServerException
     *           if an errors occurs while talking to solr
     */
    public boolean delete(String id, Date deletionDate) throws SolrServerException {
        try {
            // Load the existing episode
            QueryResponse solrResponse = null;
            try {
                SolrQuery query = new SolrQuery(Schema.DC_ID + ":" + ClientUtils.escapeQueryChars(id) + " AND "
                        + Schema.OC_DELETED + ":false");
                solrResponse = solrServer.query(query);
            } catch (Exception e1) {
                throw new SolrServerException(e1);
            }

            // Did we find the episode?
            if (solrResponse.getResults().size() == 0) {
                return false;
            }

            for (SolrDocument doc : solrResponse.getResults()) {
                // Use all existing fields
                SolrInputDocument inputDocument = new SolrInputDocument();
                for (String field : doc.getFieldNames()) {
                    inputDocument.setField(field, doc.get(field));
                }

                // Set the oc_deleted field to true and the timestamp to the current date, then update
                Schema.setOcDeleted(inputDocument, true);
                Schema.setOcTimestamp(inputDocument, deletionDate);
                solrServer.add(inputDocument);
                solrServer.commit();
            }
            return true;
        } catch (IOException e) {
            throw new SolrServerException(e);
        }
    }

    /** Set the "locked" flag of an index entry. */
    public boolean setLocked(String id, boolean locked) throws SolrServerException {
        try {
            // Load the existing episode
            QueryResponse solrResponse = null;
            try {
                SolrQuery query = new SolrQuery(Schema.DC_ID + ":" + id);
                // + " AND -" + Schema.OC_DELETED + ":[* TO *]"
                // + " AND " + Schema.OC_LOCKED + ":" + (!locked));
                solrResponse = solrServer.query(query);
            } catch (Exception e) {
                throw new SolrServerException(e);
            }

            // Did we find the episode?
            if (solrResponse.getResults().size() == 0) {
                return false;
            }

            // Use all existing fields
            for (SolrDocument doc : solrResponse.getResults()) {
                SolrInputDocument inputDocument = new SolrInputDocument();
                for (String field : doc.getFieldNames()) {
                    inputDocument.setField(field, doc.get(field));
                }

                solrServer.add(inputDocument);
                solrServer.commit();
            }
            return true;
        } catch (IOException e) {
            throw new SolrServerException(e);
        }
    }

    /** Set the "latestVersion" flag of an index entry. */
    private void resetFormerLatestVersion(MediaPackage sourceMediaPackage, Version version)
            throws SolrServerException, IOException {
        final SolrQuery query = new SolrQuery(Schema.ID + ":" + sourceMediaPackage.getIdentifier() + version);
        QueryResponse response = solrServer.query(query);

        // Did we find the episode?
        if (response.getResults().size() == 0)
            return;

        if (response.getResults().size() > 1)
            throw new SolrServerException("Multiple values with the same unique identifier found!");

        SolrDocument doc = response.getResults().get(0);
        SolrInputDocument inputDoc = new SolrInputDocument();
        for (String field : doc.getFieldNames()) {
            inputDoc.setField(field, doc.get(field));
        }
        Schema.setOcLatestVersion(inputDoc, false);
        solrServer.add(inputDoc);
    }

    /**
     * Posts the media package to solr. Depending on what is referenced in the media package, the method might create one
     * or two entries: one for the episode and one for the series that the episode belongs to.
     * 
     * Note: Media package element URIs need to be URLs pointing to existing locations.
     * 
     * @param sourceMediaPackage
     *          the media package to post
     * @param acl
     *          the access control list for this mediapackage
     * @param now
     *          current date
     * @param version
     *          the archive version
     * @throws SolrServerException
     *           if an errors occurs while talking to solr
     */
    public void add(MediaPackage sourceMediaPackage, AccessControlList acl, Date now, Version version)
            throws SolrServerException {
        try {
            final SolrInputDocument episodeDocument = createEpisodeInputDocument(sourceMediaPackage, acl, version,
                    false);
            Schema.setOcTimestamp(episodeDocument, now);
            Schema.setOcLatestVersion(episodeDocument, true);
            Schema.setOcDeleted(episodeDocument, false);
            resetFormerLatestVersion(sourceMediaPackage, new Version(version.value() - 1L));
            // Post everything to the search index
            solrServer.add(episodeDocument);
            solrServer.commit();
        } catch (Exception e) {
            try {
                solrServer.rollback();
            } catch (IOException e1) {
                throw new SolrServerException(e1);
            }
            throw new SolrServerException(e);
        }
    }

    /**
     * Posts the media package to solr. Depending on what is referenced in the media package, the method might create one
     * or two entries: one for the episode and one for the series that the episode belongs to.
     * <p/>
     * Note: Media package element URIs need to be URLs pointing to existing locations.
     * 
     * @param sourceMediaPackage
     *          the media package to post
     * @param acl
     *          the access control list for this mediapackage
     * @param version
     *          the archive version
     * @param modificationDate
     *          the modification date
     * @param deleted
     *          the deletion flag
     * @param isLatestVersion
     *          the latest version flag
     * @throws SolrServerException
     *           if an errors occurs while talking to solr
     */
    public void add(MediaPackage sourceMediaPackage, AccessControlList acl, Version version, boolean deleted,
            Date modificationDate, boolean isLatestVersion) throws SolrServerException {
        try {
            final SolrInputDocument episodeDocument = createEpisodeInputDocument(sourceMediaPackage, acl, version,
                    deleted);
            Schema.setOcTimestamp(episodeDocument, modificationDate);
            Schema.setOcLatestVersion(episodeDocument, isLatestVersion);
            Schema.setOcDeleted(episodeDocument, deleted);
            solrServer.add(episodeDocument);
            solrServer.commit();
        } catch (Exception e) {
            try {
                solrServer.rollback();
            } catch (IOException e1) {
                throw new SolrServerException(e1);
            }
            throw new SolrServerException(e);
        }
    }

    /**
     * Creates a solr input document for the episode metadata of the media package.
     * 
     * @param mediaPackage
     *          the media package
     * @param acl
     *          the access control list for this mediapackage
     * @param version
     *          the archive version
     * @param deleted
     *          whether the episode is deleted
     * @return an input document ready to be posted to solr
     * @throws MediaPackageException
     *           if serialization of the media package fails
     */
    private SolrInputDocument createEpisodeInputDocument(final MediaPackage mediaPackage, AccessControlList acl,
            final Version version, boolean deleted) throws MediaPackageException, IOException {
        final SolrInputDocument doc = new SolrInputDocument();
        final String mediaPackageId = mediaPackage.getIdentifier().toString();
        // todo fix id generation ambiguity. currently tests are broken
        // if (mediaPackageId.contains("#"))
        // throw new Error("Media package id must not contain '#' characters: " + mediaPackageId);
        // Schema.setId(doc, mediaPackageId + "#" + version);
        Schema.setId(doc, mediaPackageId + version);
        Schema.setDcId(doc, mediaPackageId);
        // /
        // OC specific fields
        Schema.setOrganization(doc, securitySvc.getOrganization().getId());
        Schema.setOcMediapackage(doc, MediaPackageParser.getAsXml(mediaPackage));
        Schema.setOcAcl(doc, AccessControlParser.toXml(acl));
        Schema.setOcElementtags(doc, tags(mediaPackage));
        Schema.setOcElementflavors(doc, flavors(mediaPackage));
        Schema.setOcVersion(doc, version);

        // Add cover
        Attachment[] cover = mediaPackage.getAttachments(MediaPackageElements.MEDIAPACKAGE_COVER_FLAVOR);
        if (cover != null && cover.length > 0) {
            Schema.setOcCover(doc, cover[0].getURI().toString());
        }

        // series fields
        for (DublinCoreCatalog dc : getSeriesDc(mediaPackage)) {
            Schema.setSeriesDublinCore(doc, dc);
            for (DField<String> a : fromDCValue(dc.get(DublinCore.PROPERTY_TITLE))) {
                Schema.setSeriesDcTitle(doc, a);
            }
        }

        // Only if episode is not already deleted
        if (!deleted) {

            // episode fields
            for (StaticMetadata md : getMetadata(metadataSvcs.get(), mediaPackage)) {
                addEpisodeMetadata(doc, md);
            }

            // episode dublincore
            for (DublinCoreCatalog a : DublinCoreUtil.loadEpisodeDublinCore(workspace, mediaPackage)) {
                Schema.setDublinCore(doc, a);
            }

            // mpeg7 fields
            logger.debug("Looking for mpeg-7 catalogs containing segment texts");
            // TODO: merge the segments from each mpeg7 if there is more than one mpeg7 catalog
            mlist(mediaPackage.getCatalogs(MediaPackageElements.TEXTS)).headOpt()
                    .orElse(new Function0<Option<Catalog>>() {
                        @Override
                        public Option<Catalog> apply() {
                            logger.debug("No text catalogs found, trying segments only");
                            return mlist(mediaPackage.getCatalogs(MediaPackageElements.SEGMENTS)).headOpt();
                        }
                    }).fold(new Option.EMatch<Catalog>() {
                        @Override
                        public void esome(final Catalog mpeg7) {
                            // load catalog and add it to the solr input document
                            addMpeg7Metadata(doc, mediaPackage, loadMpeg7Catalog(mpeg7));
                        }

                        @Override
                        public void enone() {
                            logger.debug("No segmentation catalog found");
                        }
                    });

        }

        return doc;
    }

    private Option<DublinCoreCatalog> getSeriesDc(MediaPackage mp) {
        for (String id : option(mp.getSeries())) {
            try {
                return some(seriesSvc.getSeries(id));
            } catch (Exception e) {
                logger.debug("No series dublincore found for series id " + id, e);
            }
        }
        return none();
    }

    static void addEpisodeMetadata(final SolrInputDocument doc, final StaticMetadata md) {
        Schema.fill(doc, new Schema.FieldCollector() {
            @Override
            public Option<String> getId() {
                return Option.none();
            }

            @Override
            public Option<String> getOrganization() {
                return Option.none();
            }

            @Override
            public Option<String> getDcId() {
                return Option.none();
            }

            @Override
            public Option<Date> getDcCreated() {
                return md.getCreated();
            }

            @Override
            public Option<Long> getDcExtent() {
                return md.getExtent();
            }

            @Override
            public Option<String> getDcLanguage() {
                return md.getLanguage();
            }

            @Override
            public Option<String> getDcIsPartOf() {
                return md.getIsPartOf();
            }

            @Override
            public Option<String> getDcReplaces() {
                return md.getReplaces();
            }

            @Override
            public Option<String> getDcType() {
                return md.getType();
            }

            @Override
            public Option<Date> getDcAvailableFrom() {
                return md.getAvailable().flatMap(new Function<Interval, Option<Date>>() {
                    @Override
                    public Option<Date> apply(Interval interval) {
                        return interval.fold(new Interval.Match<Option<Date>>() {
                            @Override
                            public Option<Date> bounded(Date leftBound, Date rightBound) {
                                return Option.some(leftBound);
                            }

                            @Override
                            public Option<Date> leftInfinite(Date rightBound) {
                                return Option.none();
                            }

                            @Override
                            public Option<Date> rightInfinite(Date leftBound) {
                                return Option.some(leftBound);
                            }
                        });
                    }
                });
            }

            @Override
            public Option<Date> getDcAvailableTo() {
                return md.getAvailable().flatMap(new Function<Interval, Option<Date>>() {
                    @Override
                    public Option<Date> apply(Interval interval) {
                        return interval.fold(new Interval.Match<Option<Date>>() {
                            @Override
                            public Option<Date> bounded(Date leftBound, Date rightBound) {
                                return Option.some(rightBound);
                            }

                            @Override
                            public Option<Date> leftInfinite(Date rightBound) {
                                return Option.some(rightBound);
                            }

                            @Override
                            public Option<Date> rightInfinite(Date leftBound) {
                                return Option.none();
                            }
                        });
                    }
                });
            }

            @Override
            public List<DField<String>> getDcTitle() {
                return fromMValue(md.getTitles());
            }

            @Override
            public List<DField<String>> getDcSubject() {
                return fromMValue(md.getSubjects());
            }

            @Override
            public List<DField<String>> getDcCreator() {
                return fromMValue(md.getCreators());
            }

            @Override
            public List<DField<String>> getDcPublisher() {
                return fromMValue(md.getPublishers());
            }

            @Override
            public List<DField<String>> getDcContributor() {
                return fromMValue(md.getContributors());
            }

            @Override
            public List<DField<String>> getDcDescription() {
                return fromMValue(md.getDescription());
            }

            @Override
            public List<DField<String>> getDcRightsHolder() {
                return fromMValue(md.getRightsHolders());
            }

            @Override
            public List<DField<String>> getDcSpatial() {
                return fromMValue(md.getSpatials());
            }

            @Override
            public List<DField<String>> getDcAccessRights() {
                return fromMValue(md.getAccessRights());
            }

            @Override
            public List<DField<String>> getDcLicense() {
                return fromMValue(md.getLicenses());
            }

            @Override
            public List<DField<String>> getSeriesDcTitle() {
                return nil(); // set elsewhere
            }

            @Override
            public Option<String> getOcMediapackage() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<String> getOcAcl() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<String> getOcKeywords() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<String> getOcCover() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<Date> getOcModified() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<Boolean> getOcDeleted() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<String> getOcElementtags() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<String> getOcElementflavors() {
                return Option.none(); // set elsewhere
            }

            @Override
            public List<DField<String>> getSegmentText() {
                return nil(); // set elsewhere
            }

            @Override
            public List<DField<String>> getSegmentHint() {
                return nil(); // set elsewhere
            }

            @Override
            public Option<Version> getOcVersion() {
                return Option.none(); // set elsewhere
            }

            @Override
            public Option<Boolean> getOcLatestVersion() {
                return Option.none(); // set elsewhere
            }
        });
    }

    static List<DField<String>> fromMValue(List<MetadataValue<String>> as) {
        return map(as, new ArrayList<DField<String>>(), new Function<MetadataValue<String>, DField<String>>() {
            @Override
            public DField<String> apply(MetadataValue<String> v) {
                return new DField<String>(v.getValue(), v.getLanguage());
            }
        });
    }

    static List<DField<String>> fromDCValue(List<DublinCoreValue> as) {
        return map(as, new ArrayList<DField<String>>(), new Function<DublinCoreValue, DField<String>>() {
            @Override
            public DField<String> apply(DublinCoreValue v) {
                return new DField<String>(v.getValue(), v.getLanguage());
            }
        });
    }

    static String mkString(Collection<?> as, String sep) {
        StringBuffer b = new StringBuffer();
        for (Object a : as) {
            b.append(a).append(sep);
        }
        return b.substring(0, b.length() - sep.length());
    }

    private Mpeg7Catalog loadMpeg7Catalog(Catalog cat) {
        InputStream in = null;
        try {
            File f = workspace.get(cat.getURI());
            in = new FileInputStream(f);
            return mpeg7CatalogSvc.load(in);
        } catch (NotFoundException e) {
            return chuck(new IOException("Unable to load metadata from mpeg7 catalog " + cat));
        } catch (IOException e) {
            return chuck(e);
        } finally {
            IOUtils.closeQuietly(in);
        }
    }

    /**
     * Add the mpeg 7 catalog data to the solr document.
     * 
     * @param doc
     *          the input document to the solr index
     * @param mpeg7
     *          the mpeg7 catalog
     */
    @SuppressWarnings("unchecked")
    static void addMpeg7Metadata(SolrInputDocument doc, MediaPackage mediaPackage, Mpeg7Catalog mpeg7) {

        // Check for multimedia content
        if (!mpeg7.multimediaContent().hasNext()) {
            logger.warn("Mpeg-7 doesn't contain  multimedia content");
            return;
        }

        // Get the content duration by looking at the first content track. This
        // of course assumes that all tracks are equally long.
        MultimediaContent<? extends MultimediaContentType> mc = mpeg7.multimediaContent().next();
        MultimediaContentType mct = mc.elements().next();
        MediaTime mediaTime = mct.getMediaTime();
        Schema.setDcExtent(doc, mediaTime.getMediaDuration().getDurationInMilliseconds());

        // Check if the keywords have been filled by (manually) added dublin
        // core data. If not, look for the most relevant fields in mpeg-7.
        SortedSet<TextAnnotation> sortedAnnotations = null;
        if (!"".equals(Schema.getOcKeywords(doc))) {
            sortedAnnotations = new TreeSet<TextAnnotation>(new Comparator<TextAnnotation>() {
                public int compare(TextAnnotation a1, TextAnnotation a2) {
                    if ((RELEVANCE_BOOST * a1.getRelevance()
                            + a1.getConfidence()) > (RELEVANCE_BOOST * a2.getRelevance() + a2.getConfidence()))
                        return -1;
                    else if ((RELEVANCE_BOOST * a1.getRelevance()
                            + a1.getConfidence()) < (RELEVANCE_BOOST * a2.getRelevance() + a2.getConfidence()))
                        return 1;
                    return 0;
                }
            });
        }

        // Iterate over the tracks and extract keywords and hints
        Iterator<MultimediaContent<? extends MultimediaContentType>> mmIter = mpeg7.multimediaContent();
        int segmentCount = 0;

        while (mmIter.hasNext()) {
            MultimediaContent<?> multimediaContent = mmIter.next();

            // We need to process visual segments first, due to the way they are handled in the ui.
            for (Iterator<?> iterator = multimediaContent.elements(); iterator.hasNext();) {

                MultimediaContentType type = (MultimediaContentType) iterator.next();
                if (!(type instanceof Video) && !(type instanceof AudioVisual))
                    continue;

                // for every segment in the current multimedia content track

                Video video = (Video) type;
                Iterator<VideoSegment> vsegments = (Iterator<VideoSegment>) video.getTemporalDecomposition()
                        .segments();
                while (vsegments.hasNext()) {
                    VideoSegment segment = vsegments.next();

                    StringBuffer segmentText = new StringBuffer();
                    StringBuffer hintField = new StringBuffer();

                    // Collect the video text elements to a segment text
                    SpatioTemporalDecomposition spt = segment.getSpatioTemporalDecomposition();
                    if (spt != null) {
                        for (VideoText videoText : spt.getVideoText()) {
                            if (segmentText.length() > 0)
                                segmentText.append(" ");
                            segmentText.append(videoText.getText().getText());
                            // TODO: Add hint on bounding box
                        }
                    }

                    // Add keyword annotations
                    Iterator<TextAnnotation> textAnnotations = segment.textAnnotations();
                    while (textAnnotations.hasNext()) {
                        TextAnnotation textAnnotation = textAnnotations.next();
                        Iterator<?> kwIter = textAnnotation.keywordAnnotations();
                        while (kwIter.hasNext()) {
                            KeywordAnnotation keywordAnnotation = (KeywordAnnotation) kwIter.next();
                            if (segmentText.length() > 0)
                                segmentText.append(" ");
                            segmentText.append(keywordAnnotation.getKeyword());
                        }
                    }

                    // Add free text annotations
                    Iterator<TextAnnotation> freeIter = segment.textAnnotations();
                    if (freeIter.hasNext()) {
                        Iterator<FreeTextAnnotation> freeTextIter = freeIter.next().freeTextAnnotations();
                        while (freeTextIter.hasNext()) {
                            FreeTextAnnotation freeTextAnnotation = freeTextIter.next();
                            if (segmentText.length() > 0)
                                segmentText.append(" ");
                            segmentText.append(freeTextAnnotation.getText());
                        }
                    }

                    // add segment text to solr document
                    Schema.setSegmentText(doc,
                            new DField<String>(segmentText.toString(), Integer.toString(segmentCount)));

                    // get the segments time properties
                    MediaTimePoint timepoint = segment.getMediaTime().getMediaTimePoint();
                    MediaDuration duration = segment.getMediaTime().getMediaDuration();

                    // TODO: define a class with hint field constants
                    hintField.append("time=" + timepoint.getTimeInMilliseconds() + "\n");
                    hintField.append("duration=" + duration.getDurationInMilliseconds() + "\n");

                    // Look for preview images. Their characteristics are that they are
                    // attached as attachments with a flavor of preview/<something>.
                    String time = timepoint.toString();
                    for (Attachment slide : mediaPackage
                            .getAttachments(MediaPackageElements.PRESENTATION_SEGMENT_PREVIEW)) {
                        MediaPackageReference ref = slide.getReference();
                        if (ref != null && time.equals(ref.getProperty("time"))) {
                            hintField.append("preview");
                            hintField.append(".");
                            hintField.append(ref.getIdentifier());
                            hintField.append("=");
                            hintField.append(slide.getURI().toString());
                            hintField.append("\n");
                        }
                    }

                    logger.trace("Adding segment: " + timepoint.toString());
                    Schema.setSegmentHint(doc,
                            new DField<String>(hintField.toString(), Integer.toString(segmentCount)));

                    // increase segment counter
                    segmentCount++;
                }
            }
        }

        // Put the most important keywords into a special solr field
        if (sortedAnnotations != null) {
            Schema.setOcKeywords(doc, importantKeywordsString(sortedAnnotations).toString());
        }
    }

    /**
     * Generates a string with the most important kewords from the text annotation.
     * 
     * @param sortedAnnotations
     * @return The keyword string.
     */
    static StringBuffer importantKeywordsString(SortedSet<TextAnnotation> sortedAnnotations) {

        // important keyword:
        // - high relevance
        // - high confidence
        // - occur often
        // - more than MAX_CHAR chars

        // calculate keyword occurences (histogram) and importance
        ArrayList<String> list = new ArrayList<String>();
        Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
        TextAnnotation textAnnotation = null;
        String keyword = null;

        HashMap<String, Integer> histogram = new HashMap<String, Integer>();
        HashMap<String, Double> importance = new HashMap<String, Double>();
        int occ = 0;
        double imp;
        while (textAnnotations.hasNext()) {
            textAnnotation = textAnnotations.next();
            Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
            while (keywordAnnotations.hasNext()) {
                KeywordAnnotation annotation = keywordAnnotations.next();
                keyword = annotation.getKeyword().toLowerCase();
                if (keyword.length() > MAX_CHAR) {
                    occ = 0;
                    if (histogram.keySet().contains(keyword)) {
                        occ = histogram.get(keyword);
                    }
                    histogram.put(keyword, occ + 1);

                    // here the importance value is calculated
                    // from relevance, confidence and frequency of occurence.
                    imp = (RELEVANCE_BOOST * getMaxRelevance(keyword, sortedAnnotations)
                            + getMaxConfidence(keyword, sortedAnnotations)) * (occ + 1);
                    importance.put(keyword, imp);
                }
            }
        }

        // get the MAX_IMPORTANT_COUNT most important keywords
        StringBuffer buf = new StringBuffer();

        while (list.size() < MAX_IMPORTANT_COUNT && importance.size() > 0) {
            double max = 0.0;
            String maxKeyword = null;

            // get maximum from importance list
            for (Entry<String, Double> entry : importance.entrySet()) {
                keyword = entry.getKey();
                if (max < entry.getValue()) {
                    max = entry.getValue();
                    maxKeyword = keyword;
                }
            }

            // pop maximum
            importance.remove(maxKeyword);

            // append keyword to string
            if (buf.length() > 0)
                buf.append(" ");
            buf.append(maxKeyword);
        }

        return buf;
    }

    /**
     * Gets the maximum confidence for a given keyword in the text annotation.
     * 
     * @param keyword
     * @param sortedAnnotations
     * @return The maximum confidence value.
     */
    static double getMaxConfidence(String keyword, SortedSet<TextAnnotation> sortedAnnotations) {
        double max = 0.0;
        String needle = null;
        TextAnnotation textAnnotation = null;
        Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
        while (textAnnotations.hasNext()) {
            textAnnotation = textAnnotations.next();
            Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
            while (keywordAnnotations.hasNext()) {
                KeywordAnnotation ann = keywordAnnotations.next();
                needle = ann.getKeyword().toLowerCase();
                if (keyword.equals(needle)) {
                    if (max < textAnnotation.getConfidence()) {
                        max = textAnnotation.getConfidence();
                    }
                }
            }
        }
        return max;
    }

    /**
     * Gets the maximum relevance for a given keyword in the text annotation.
     * 
     * @param keyword
     * @param sortedAnnotations
     * @return The maximum relevance value.
     */
    static double getMaxRelevance(String keyword, SortedSet<TextAnnotation> sortedAnnotations) {
        double max = 0.0;
        String needle = null;
        TextAnnotation textAnnotation = null;
        Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
        while (textAnnotations.hasNext()) {
            textAnnotation = textAnnotations.next();
            Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
            while (keywordAnnotations.hasNext()) {
                KeywordAnnotation ann = keywordAnnotations.next();
                needle = ann.getKeyword().toLowerCase();
                if (keyword.equals(needle)) {
                    if (max < textAnnotation.getRelevance()) {
                        max = textAnnotation.getRelevance();
                    }
                }
            }
        }
        return max;
    }

    /** Get metadata from all registered metadata services. */
    static List<StaticMetadata> getMetadata(final List<StaticMetadataService> mdServices, final MediaPackage mp) {
        return flatMap(mdServices, new ArrayList<StaticMetadata>(),
                new Function<StaticMetadataService, Collection<StaticMetadata>>() {
                    @Override
                    public Collection<StaticMetadata> apply(StaticMetadataService s) {
                        StaticMetadata md = s.getMetadata(mp);
                        return md != null ? list(md) : Collections.EMPTY_LIST;
                    }
                });
    }

    /** Return all media package tags as a space separated string. */
    static String tags(MediaPackage mp) {
        StringBuilder sb = new StringBuilder();
        for (MediaPackageElement element : mp.getElements()) {
            for (String tag : element.getTags()) {
                sb.append(tag);
                sb.append(" ");
            }
        }
        return sb.toString();
    }

    /** Return all media package flavors as a space separated string. */
    static String flavors(MediaPackage mp) {
        StringBuilder sb = new StringBuilder();
        for (MediaPackageElement element : mp.getElements()) {
            if (element.getFlavor() != null) {
                sb.append(element.getFlavor().toString());
                sb.append(" ");
            }
        }
        return sb.toString();
    }
}