net.di2e.ecdr.search.transform.atom.response.AtomResponseTransformer.java Source code

Java tutorial

Introduction

Here is the source code for net.di2e.ecdr.search.transform.atom.response.AtomResponseTransformer.java

Source

/**
 * Copyright (C) 2014 Cohesive Integrations, LLC (info@cohesiveintegrations.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.di2e.ecdr.search.transform.atom.response;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.io.StringWriter;
import java.net.URI;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;

import javax.activation.MimeType;
import javax.xml.namespace.QName;

import net.di2e.ecdr.api.queryresponse.SearchResponseTransformer;
import net.di2e.ecdr.commons.CDRMetacard;
import net.di2e.ecdr.commons.CDRMetacardType;
import net.di2e.ecdr.commons.filter.config.AtomSearchResponseTransformerConfig;
import net.di2e.ecdr.commons.filter.config.AtomSearchResponseTransformerConfig.AtomContentXmlWrapOption;
import net.di2e.ecdr.search.transform.atom.constants.AtomResponseConstants;
import net.di2e.ecdr.search.transform.atom.geo.AbderaConverter;
import net.di2e.ecdr.search.transform.atom.response.security.SecurityMarkingParser;

import org.apache.abdera.Abdera;
import org.apache.abdera.ext.geo.Position;
import org.apache.abdera.ext.opensearch.OpenSearchConstants;
import org.apache.abdera.i18n.iri.IRI;
import org.apache.abdera.model.Category;
import org.apache.abdera.model.Content;
import org.apache.abdera.model.Document;
import org.apache.abdera.model.Element;
import org.apache.abdera.model.Entry;
import org.apache.abdera.model.Feed;
import org.apache.abdera.model.Link;
import org.apache.abdera.parser.Parser;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.ISODateTimeFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ddf.catalog.data.Metacard;
import ddf.catalog.data.Result;
import ddf.catalog.data.impl.ResultImpl;
import ddf.catalog.operation.QueryRequest;
import ddf.catalog.operation.SourceResponse;
import ddf.catalog.operation.impl.SourceResponseImpl;

public class AtomResponseTransformer implements SearchResponseTransformer {

    private static final transient Logger LOGGER = LoggerFactory.getLogger(AtomResponseTransformer.class);
    private static final String METADATA_ELEMENT_NAME = "Resource";

    private static final DateTimeFormatter DATE_FORMATTER = ISODateTimeFormat.dateTimeParser();

    private static final Abdera ABDERA = Abdera.getInstance();

    private AtomSearchResponseTransformerConfig filterConfig = null;

    public AtomResponseTransformer(AtomSearchResponseTransformerConfig config) {
        this.filterConfig = config;
    }

    @Override
    public SourceResponse processSearchResponse(InputStream inputStream, QueryRequest request, String siteName) {
        List<Result> resultList = new ArrayList<Result>();

        ClassLoader tccl = Thread.currentThread().getContextClassLoader();
        Parser parser = null;
        Document<Feed> atomDoc;
        try {

            Thread.currentThread().setContextClassLoader(AtomResponseTransformer.class.getClassLoader());
            parser = ABDERA.getParser();
            if (LOGGER.isTraceEnabled()) {
                StringWriter writer = new StringWriter();
                try {
                    IOUtils.copy(inputStream, writer);
                    LOGGER.trace("Transforming the following atom feed into a DDF SourceResponse:{}{}",
                            System.lineSeparator(), writer);
                    inputStream = IOUtils.toInputStream(writer.toString());
                } catch (IOException e) {
                    LOGGER.trace("Could not print out atom stream for log: {}", e.getMessage());
                }
            }
            atomDoc = parser.parse(new InputStreamReader(inputStream));
        } finally {
            Thread.currentThread().setContextClassLoader(tccl);
        }

        Feed feed = atomDoc.getRoot();

        List<Entry> entries = feed.getEntries();
        int size = entries.size();
        for (Entry entry : entries) {
            if (isValidEntry(entry)) {
                Metacard metacard = entryToMetacard(entry, siteName);
                resultList.add(metacardToResult(entry, metacard));
            } else {
                LOGGER.debug("Skipping invalid entry: {}", entry);
                size--;
            }
        }

        long totalResults = size;
        Element totalResultsElement = atomDoc.getRoot().getExtension(OpenSearchConstants.TOTAL_RESULTS);

        if (totalResultsElement != null) {
            try {
                totalResults = Long.parseLong(totalResultsElement.getText());
            } catch (NumberFormatException e) {
                LOGGER.warn("Received invalid number of results from Atom response ["
                        + totalResultsElement.getText() + "]", e);
            }
        }

        Map<String, Serializable> responseProperties = null;

        return new SourceResponseImpl(request, responseProperties, resultList, totalResults);
    }

    private Metacard entryToMetacard(Entry entry, String siteName) {
        CDRMetacard metacard = new CDRMetacard(CDRMetacardType.CDR_METACARD);

        String id = entry.getIdElement().getText();
        // id may be formatted catalog:id:<id>, so we parse out the <id>
        if (StringUtils.isNotBlank(id) && (id.startsWith("urn:uuid:") || id.startsWith("urn:catalog:id:"))) {
            id = id.substring(id.lastIndexOf(':') + 1);
        }
        metacard.setId(id);

        metacard.setSourceId(siteName);

        List<Category> categories = entry.getCategories();
        if (categories != null && !categories.isEmpty()) {
            Category category = categories.get(0);
            metacard.setContentTypeName(category.getTerm());
            IRI scheme = category.getScheme();
            if (scheme != null) {
                metacard.setContentTypeVersion(scheme.toString());
            }
        }

        try {
            metacard.setModifiedDate(entry.getUpdated());
        } catch (IllegalArgumentException e) {
            LOGGER.warn("InvalidDate found in atom reponse, setting Metacard modified time to now ");
            metacard.setEffectiveDate(new Date());
        }
        try {
            metacard.setEffectiveDate(entry.getPublished());
        } catch (IllegalArgumentException e) {
            LOGGER.warn("InvalidDate found in atom reponse, setting Metacard Effective time to now ");
            metacard.setEffectiveDate(new Date());
        }

        String createdDate = entry.getSimpleExtension(new QName(AtomResponseConstants.METACARD_ATOM_NAMESPACE,
                AtomResponseConstants.METACARD_CREATED_DATE_ELEMENT));
        if (createdDate != null) {
            metacard.setCreatedDate(new Date(DATE_FORMATTER.parseMillis(createdDate)));
        }

        String expirationDate = entry.getSimpleExtension(new QName(AtomResponseConstants.METACARD_ATOM_NAMESPACE,
                AtomResponseConstants.METADATA_EXPIRATION_DATE_ELEMENT));
        if (expirationDate != null) {
            metacard.setExpirationDate(new Date(DATE_FORMATTER.parseMillis(expirationDate)));
        }

        AtomContentXmlWrapOption wrap = filterConfig.getAtomContentXmlWrapOption();
        String metadata = entry.getContent();
        populateMetadata(entry, metacard, wrap, metadata);

        metacard.setLocation(getWKT(entry));

        Link productLink = entry.getLink(filterConfig.getProductLinkRelation());
        if (productLink != null) {

            metacard.setResourceURI(URI.create(productLink.getHref().toASCIIString()));
            long resourceSize = productLink.getLength();
            if (resourceSize > 0) {
                metacard.setResourceSize(String.valueOf(resourceSize));
            }
            String productTitle = productLink.getTitle();
            if (productTitle != null) {
                metacard.setAttribute(CDRMetacard.RESOURCE_TITLE, productTitle);
            }
            // ECDR-41 figure out MIMEType
            MimeType productType = productLink.getMimeType();
            if (productType != null) {
                metacard.setAttribute(CDRMetacard.RESOURCE_MIME_TYPE, productType.toString());
            }
        }

        String thumbnailLinkRel = filterConfig.getThumbnailLinkRelation();
        if (thumbnailLinkRel != null) {
            List<Link> links = entry.getLinks(thumbnailLinkRel);
            if (links != null && !links.isEmpty()) {
                for (Link link : links) {
                    MimeType mimeType = link.getMimeType();
                    if (mimeType == null || "image".equals(mimeType.getPrimaryType())) {

                        metacard.setThumbnailLinkURI(URI.create(link.getHref().toASCIIString()));
                        long thumbnailSize = link.getLength();
                        if (thumbnailSize > 0) {
                            metacard.setAttribute(CDRMetacard.THUMBNAIL_LENGTH, Long.valueOf(thumbnailSize));
                        }
                        // ECDR-41 figure out MIMEType
                        metacard.setAttribute(CDRMetacard.THUMBNAIL_MIMETYPE, link.getMimeType());
                        metacard.setAttribute(CDRMetacard.THUMBNAIL_LINK_TITLE, link.getTitle());
                        break;
                    }
                }
            }
        }
        metacard.setTitle(entry.getTitle());

        boolean isMetadataSet = false;
        ClassLoader tccl = Thread.currentThread().getContextClassLoader();
        try {
            Thread.currentThread().setContextClassLoader(AtomResponseTransformer.class.getClassLoader());
            List<Element> extensions = entry.getExtensions();
            for (Element element : extensions) {
                if (METADATA_ELEMENT_NAME.equalsIgnoreCase(element.getQName().getLocalPart())) {
                    StringWriter writer = new StringWriter();
                    try {
                        element.writeTo(writer);
                        metacard.setMetadata(writer.toString());
                        isMetadataSet = true;
                        break;
                    } catch (IOException e) {
                        LOGGER.error(
                                "Could not convert Metadata String value from Atom to Metacard.METADATA attribute",
                                e);
                    }

                }
            }
        } finally {
            Thread.currentThread().setContextClassLoader(tccl);
        }
        if (!isMetadataSet) {
            String metadataLinkRel = filterConfig.getMetadataLinkRelation();
            if (metadataLinkRel != null) {
                List<Link> metadataLinks = entry.getLinks(metadataLinkRel);
                String metadataLink = null;
                for (Link link : metadataLinks) {
                    MimeType mimeType = link.getMimeType();
                    if (mimeType != null) {
                        if (mimeType.getSubType().contains("xml")) {
                            metadataLink = link.getHref().toASCIIString();
                            metacard.setMetadataLinkURI(URI.create(metadataLink));
                            metacard.setAttribute(CDRMetacard.WRAP_METADATA, null);
                            break;
                        } else if (mimeType.getBaseType().contains("text")) {
                            metadataLink = link.getHref().toASCIIString();
                            metacard.setMetadataLinkURI(URI.create(metadataLink));
                            metacard.setAttribute(CDRMetacard.WRAP_METADATA, Boolean.TRUE);
                        }
                    }
                }
            }
        }
        Metacard returnMetacard = SecurityMarkingParser.addSecurityToMetacard(metacard, entry);
        return new CDRMetacard(returnMetacard);
    }

    protected void populateMetadata(Entry entry, CDRMetacard metacard, AtomContentXmlWrapOption wrap,
            String metadata) {
        if (metadata != null) {
            if (wrap != null && wrap != AtomContentXmlWrapOption.NEVER_WRAP) {
                if (wrap == AtomContentXmlWrapOption.WRAP_HTML_AND_TEXT) {
                    Content.Type contentType = entry.getContentType();
                    // certain content types may not follow XML structure
                    switch (contentType) {
                    case TEXT:
                    case HTML:
                        // add content element to make sure it has single root
                        metadata = "<xml-fragment>" + metadata + "</xml-fragment>";
                        break;
                    default:
                        // other items are xml-based
                        break;
                    }
                } else {
                    metadata = "<xml-fragment>" + metadata + "</xml-fragment>";
                }
            }
            metacard.setMetadata(metadata);
        }
    }

    protected Result metacardToResult(Entry entry, Metacard metacard) {
        ResultImpl result = new ResultImpl(metacard);
        String relevance = entry.getSimpleExtension(AtomResponseConstants.RELEVANCE_NAMESPACE,
                AtomResponseConstants.RELEVANCE_ELEMENT, AtomResponseConstants.RELEVANCE_NAMESPACE_PREFIX);
        if (relevance != null) {
            try {
                result.setRelevanceScore(Double.parseDouble(relevance));
            } catch (NumberFormatException e) {
                LOGGER.warn("Received invalid number for relevance from Atom response [" + relevance + "]", e);
            }
        }

        String distance = entry.getSimpleExtension(AtomResponseConstants.CDRS_EXT_NAMESPACE,
                AtomResponseConstants.DISTANCE_ELEMENT, AtomResponseConstants.CDRS_EXT_NAMESPACE_PREFIX);
        if (distance != null) {
            try {
                result.setDistanceInMeters(Double.parseDouble(distance));
            } catch (NumberFormatException e) {
                LOGGER.warn("Received invalid number for distance from Atom response [" + distance + "]", e);
            }
        }

        return result;
    }

    protected String getWKT(Entry entry) {
        String wkt = null;
        Position[] positions = net.di2e.ecdr.search.transform.atom.geo.GeoHelper.getPositions(entry);
        int length = positions.length;
        if (length == 1) {
            LOGGER.debug(
                    "Found one geometry in the current Atom entry, converting to WKT for inclusion in metacard");
            return AbderaConverter.convertToWKT(positions[0]);
        } else if (length > 1) {
            LOGGER.debug(
                    "Found multiple geometries in the current Atom entry, converting to MULTI-WKT for inclusion in metacard");
            return AbderaConverter.convertToWKT(positions);

        }
        return wkt;
    }

    /**
     * Check to see if entry is a valid ATOM Entry conforming to the specification.
     * 
     * @param entry
     * @return true if incoming entry conforms to the specification, false if it does not.
     */
    private boolean isValidEntry(Entry entry) {
        if (entry == null) {
            return false;
        }
        // RFC4287 Section 4.1.2
        // atom:entry elements MUST contain exactly one atom:id element.
        // atom:entry elements MUST contain exactly one atom:title element.
        // atom:entry elements MUST contain exactly one atom:updated element.
        // quick check to make sure that the entry contains those elements.
        return (entry.getIdElement() != null && entry.getTitleElement() != null
                && entry.getUpdatedElement() != null);
    }

}